diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h --- a/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -108,9 +108,6 @@ PointerIntPair lie; - SlotIndex(IndexListEntry *entry, unsigned slot) - : lie(entry, slot) {} - IndexListEntry* listEntry() const { assert(isValid() && "Attempt to compare reserved index."); #ifdef EXPENSIVE_CHECKS @@ -139,6 +136,11 @@ /// Construct an invalid index. SlotIndex() = default; + // Creates a SlotIndex from an IndexListEntry and a slot. Generally should + // not be used. This method is only public to facilitate writing certain + // unit tests. + SlotIndex(IndexListEntry *entry, unsigned slot) : lie(entry, slot) {} + // Construct a new slot index from the given one, and set the slot. SlotIndex(const SlotIndex &li, Slot s) : lie(li.listEntry(), unsigned(s)) { assert(lie.getPointer() != nullptr && diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h @@ -0,0 +1,72 @@ +//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Function declarations of utilities related to feature extraction for unit +// testing. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H +#define LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H + +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/CodeGen/SlotIndexes.h" + +using namespace llvm; + +// LRStartEndInfo contains the start and end of a specific live range as +// slot indices as well as storing the index of the physical register it +// is assigned to (or 1 above the phys reg count if its the candidate). +// Used when extracting per-instruction features in the context of a +// specific eviction problem. +struct LRStartEndInfo { + SlotIndex Begin; + SlotIndex End; + size_t Pos = 0; +}; + +void extractInstructionFeatures( + llvm::SmallVectorImpl &LRPosInfo, + MLModelRunner *RegallocRunner, function_ref GetOpcode, + const int InstructionsIndex, const int InstructionsMappingIndex, + const SlotIndex LastIndex); + +// This is the maximum number of interfererring ranges. That's the number of +// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize. +// For X86, that's 32. +// TODO: find a way to get this, statically, in a programmatic way. +static const int64_t MaxInterferences = 32; + +// Logically, we can think of the feature set given to the evaluator as a 2D +// matrix. The rows are the features (see next). The columns correspond to the +// interferences. We treat the candidate virt reg as an 'interference', too, as +// its feature set is the same as that of the interferring ranges. So we'll have +// MaxInterferences + 1 columns and by convention, we will use the last column +// for the virt reg seeking allocation. +static const int64_t CandidateVirtRegPos = MaxInterferences; +static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; + +// The number of instructions that a specific live range might have is variable, +// but we're passing in a single matrix of instructions and tensorflow saved +// models only support a fixed input size, so we have to cap the number of +// instructions that can be passed along. The specific value was derived from +// experimentation such that the majority of eviction problems would be +// completely covered. +static const int ModelMaxSupportedInstructionCount = 300; + +// When extracting per-instruction features, the advisor will currently create +// a vector of size ModelMaxSupportedInstructionCount to hold the opcodes of the +// instructions relevant to the eviction problem, and a NumberOfInterferences * +// ModelMaxSupportedInstructionCount matrix that maps LRs to the instructions +// that they span. +static const std::vector InstructionsShape{ + 1, ModelMaxSupportedInstructionCount}; +static const std::vector InstructionsMappingShape{ + 1, NumberOfInterferences, ModelMaxSupportedInstructionCount}; + +#endif // LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/NoInferenceModelRunner.h" #include "llvm/Analysis/Utils/TrainingLogger.h" #endif +#include "MLRegallocEvictAdvisor.h" #include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveRegMatrix.h" @@ -64,6 +65,13 @@ "regalloc-model", cl::Hidden, cl::desc("The model being trained for register allocation eviction")); +static cl::opt EnableDevelopmentFeatures( + "regalloc-enable-development-features", cl::Hidden, + cl::desc("Whether or not to enable features under development for the ML " + "regalloc advisor")); + +#else +static const bool EnableDevelopmentFeatures = false; #endif // #ifdef LLVM_HAVE_TF_API extern cl::opt EvictInterferenceCutoff; @@ -110,20 +118,9 @@ // Common ML Advisor declarations // =================================== namespace { -// This is the maximum number of interfererring ranges. That's the number of -// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize. -// For X86, that's 32. -// TODO: find a way to get this, statically, in a programmatic way. -static const int64_t MaxInterferences = 32; - -// Logically, we can think of the feature set given to the evaluator as a 2D -// matrix. The rows are the features (see next). The columns correspond to the -// interferences. We treat the candidate virt reg as an 'interference', too, as -// its feature set is the same as that of the interferring ranges. So we'll have -// MaxInterferences + 1 columns and by convention, we will use the last column -// for the virt reg seeking allocation. -static const int64_t CandidateVirtRegPos = MaxInterferences; -static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; +// The model can only accept a specified number of opcodes and will error it if +// fed an opcode it hasn't seen before. This constant sets the current cutoff. +static const int OpcodeValueCutoff = 17716; // Most features are as described above, so we'll reuse this vector in defining // them. @@ -193,6 +190,19 @@ "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") +#ifdef LLVM_HAVE_TF_API +#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \ + M(int64_t, instructions, InstructionsShape, \ + "Opcodes of the instructions covered by the eviction problem") + +#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \ + M(int64_t, instructions_mapping, InstructionsMappingShape, \ + "A binary matrix mapping LRs to instruction opcodes") +#else +#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) +#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) +#endif + // The model learns to pick one of the mask == 1 interferences. This is the // name of the output tensor. The contract with the model is that the output // will be guaranteed to be to a mask == 1 position. Using a macro here to @@ -201,10 +211,17 @@ // Named features index. enum FeatureIDs { -#define _FEATURE_IDX(_, name, __, ___) name, - RA_EVICT_FEATURES_LIST(_FEATURE_IDX) +#define _FEATURE_IDX_SIMPLE(_, name, __, ___) name +#define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D), + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount, +#ifdef LLVM_HAVE_TF_API + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount, +#else + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX) +#endif // #ifdef LLVM_HAVE_TF_API + RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount #undef _FEATURE_IDX - FeatureCount +#undef _FEATURE_IDX_SIMPLE }; // The ML advisor will typically have a sparse input to the evaluator, because @@ -223,7 +240,11 @@ std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ getTotalSize(SHAPE)); RA_EVICT_FEATURES_LIST(_RESET) + if (EnableDevelopmentFeatures) { + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET) #undef _RESET + } } // Per-live interval components that get aggregated into the feature values @@ -272,11 +293,11 @@ /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. - bool loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, - const SmallVirtRegSet &FixedRegisters, - llvm::SmallVectorImpl &Largest, - size_t Pos) const; + bool + loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, + bool IsHint, const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl &Largest, size_t Pos, + SmallVectorImpl &LRPosInfo) const; private: static float getInitialQueueSize(const MachineFunction &MF); @@ -288,8 +309,8 @@ void extractFeatures(const SmallVectorImpl &Intervals, llvm::SmallVectorImpl &Largest, size_t Pos, - int64_t IsHint, int64_t LocalIntfsCount, - float NrUrgent) const; + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl &LRPosInfo) const; // Point-in-time: we didn't learn this, so we always delegate to the // default. @@ -332,7 +353,13 @@ public: ReleaseModeEvictionAdvisorAnalysis() : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + if (EnableDevelopmentFeatures) { + InputFeatures = {RA_EVICT_FEATURES_LIST( + _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + } } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { @@ -399,12 +426,25 @@ public: DevelopmentModeEvictionAdvisorAnalysis() : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; - TrainingInputFeatures = { - RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec("action_discount", {1}), - TensorSpec::createSpec("action_step_type", {1}), - TensorSpec::createSpec("action_reward", {1})}; + if (EnableDevelopmentFeatures) { + InputFeatures = {RA_EVICT_FEATURES_LIST( + _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { @@ -535,7 +575,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures( const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, - llvm::SmallVectorImpl &Largest, size_t Pos) const { + llvm::SmallVectorImpl &Largest, size_t Pos, + llvm::SmallVectorImpl &LRPosInfo) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { // leave unavailable @@ -594,7 +635,7 @@ // OK, so if we made it this far, this LR is an eviction candidate, load its // features. extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs, - NrUrgent); + NrUrgent, LRPosInfo); return true; } @@ -638,6 +679,7 @@ // reset all the features to 0) Use Pos to capture the column we load // features at - in AllocationOrder order. size_t Pos = 0; + SmallVector LRPosInfo; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { MCRegister PhysReg = *I; @@ -647,7 +689,7 @@ continue; } if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters, - Largest, Pos)) { + Largest, Pos, LRPosInfo)) { ++Available; Regs[Pos] = std::make_pair(PhysReg, true); } @@ -665,9 +707,25 @@ extractFeatures(SmallVector(1, &VirtReg), Largest, CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, - /*NrUrgent*/ 0.0); + /*NrUrgent*/ 0.0, LRPosInfo); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); +#ifdef LLVM_HAVE_TF_API + if (EnableDevelopmentFeatures) { + extractInstructionFeatures( + LRPosInfo, Runner, + [this](SlotIndex InputIndex) -> int { + auto *CurrentMachineInstruction = + LIS->getInstructionFromIndex(InputIndex); + if (!CurrentMachineInstruction) { + return -1; + } + return CurrentMachineInstruction->getOpcode(); + }, + FeatureIDs::instructions, FeatureIDs::instructions_mapping, + LIS->getSlotIndexes()->getLastIndex()); + } +#endif // #ifdef LLVM_HAVE_TF_API // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; @@ -752,7 +810,8 @@ void MLEvictAdvisor::extractFeatures( const SmallVectorImpl &Intervals, llvm::SmallVectorImpl &Largest, size_t Pos, int64_t IsHint, - int64_t LocalIntfsCount, float NrUrgent) const { + int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl &LRPosInfo) const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; double R = 0.0; @@ -799,6 +858,13 @@ HintWeights += LIFC.HintWeights; NrRematerializable += LIFC.IsRemat; + + if (EnableDevelopmentFeatures) { + for (auto CurrentSegment : LI) { + LRPosInfo.push_back( + LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos}); + } + } } size_t Size = 0; if (!Intervals.empty()) { @@ -841,8 +907,110 @@ #undef SET } +void extractInstructionFeatures(SmallVectorImpl &LRPosInfo, + MLModelRunner *RegallocRunner, + function_ref GetOpcode, + const int InstructionsIndex, + const int InstructionsMappingIndex, + const SlotIndex LastIndex) { + // This function extracts instruction based features relevant to the eviction + // problem currently being solved. This function ends up extracting two + // tensors. + // 1 - A vector of size max instruction count. It contains the opcodes of the + // instructions spanned by all the intervals in the current instance of the + // eviction problem. + // 2 - A binary mapping matrix of size (LR count * max + // instruction count) which maps where the LRs are live to the actual opcodes + // for which they are live. + + // Start off by sorting the segments based on the beginning slot index. + std::sort( + LRPosInfo.begin(), LRPosInfo.end(), + [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; }); + size_t InstructionIndex = 0; + size_t CurrentSegmentIndex = 0; + SlotIndex CurrentIndex = LRPosInfo[0].Begin; + // This loop processes all the segments sequentially by starting at the + // beginning slot index of the first segment, iterating through all the slot + // indices before the end slot index of that segment (while checking for + // overlaps with segments that start at greater slot indices). After hitting + // that end index, the current segment being processed gets bumped until they + // are all processed or the max instruction count is hit, where everything is + // just truncated. + while (true) { + // If the index that we are currently at is within the current segment and + // we haven't hit the max instruction count, continue processing the current + // segment. + while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End && + InstructionIndex < ModelMaxSupportedInstructionCount) { + int CurrentOpcode = GetOpcode(CurrentIndex); + // If the current machine instruction is null, skip it + if (CurrentOpcode == -1) { + // If we're currently at the last index in the SlotIndex analysis, + // we can't go any further, so return from the function + if (CurrentIndex >= LastIndex) { + return; + } + CurrentIndex = CurrentIndex.getNextIndex(); + continue; + } + // Current code assumes we're not going to get any disjointed segments + assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex); + RegallocRunner->getTensor(InstructionsIndex)[InstructionIndex] = + CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0; + // set value in the binary mapping matrix for the current instruction + auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos; + RegallocRunner->getTensor( + InstructionsMappingIndex)[CurrentSegmentPosition * + ModelMaxSupportedInstructionCount + + InstructionIndex] = 1; + // All of the segments are sorted based on the beginning slot index, but + // this doesn't mean that the beginning slot index of the next segment is + // after the end segment of the one being currently processed. This while + // loop checks for overlapping segments and modifies the portion of the + // column in the mapping matrix for the currently processed instruction + // for the LR it is checking. Also make sure that the beginning of the + // current segment we're checking for overlap in is less than the current + // index, otherwise we're done checking overlaps. + size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1; + while (OverlapCheckCurrentSegment < LRPosInfo.size() && + LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) { + auto OverlapCurrentSegmentPosition = + LRPosInfo[OverlapCheckCurrentSegment].Pos; + if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) { + RegallocRunner->getTensor( + InstructionsMappingIndex)[OverlapCurrentSegmentPosition * + ModelMaxSupportedInstructionCount + + InstructionIndex] = 1; + } + ++OverlapCheckCurrentSegment; + } + ++InstructionIndex; + if (CurrentIndex >= LastIndex) { + return; + } + CurrentIndex = CurrentIndex.getNextIndex(); + } + // if we've just finished processing through the last segment or if we've + // hit the maximum number of instructions, break out of the loop. + if (CurrentSegmentIndex == LRPosInfo.size() - 1 || + InstructionIndex >= ModelMaxSupportedInstructionCount) { + break; + } + // If the segments are not overlapping, we need to move to the beginning + // index of the next segment to avoid having instructions not attached to + // any register. + if (LRPosInfo[CurrentSegmentIndex + 1].Begin > + LRPosInfo[CurrentSegmentIndex].End) { + CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin; + } + ++CurrentSegmentIndex; + } +} + // Development mode-specific implementations #ifdef LLVM_HAVE_TF_API + RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { return new DevelopmentModeEvictionAdvisorAnalysis(); } @@ -872,7 +1040,10 @@ if (TrainingLog.empty()) return Ret; size_t CurrentFeature = 0; - for (; CurrentFeature < FeatureIDs::FeatureCount; ++CurrentFeature) { + size_t FeatureCount = EnableDevelopmentFeatures + ? FeatureIDs::FeaturesWithDevelopmentCount + : FeatureIDs::FeatureCount; + for (; CurrentFeature < FeatureCount; ++CurrentFeature) { Log->logSpecifiedTensorValue( CurrentFeature, reinterpret_cast( getRunner().getTensorUntyped(CurrentFeature))); diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll @@ -0,0 +1,52 @@ +; REQUIRES: have_tf_api +; REQUIRES: x86_64-linux +; +; Check that we log the currently in development features correctly with both the default +; case and with a learned policy. +; +; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \ +; RUN: -regalloc-training-log=%t1 -tfutils-text-log \ +; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll +; RUN: sed -i 's/ \+/ /g' %t1 +; RUN: sed -i 's/\\n key:/\n key:/g' %t1 +; RUN: sed -i 's/\\n feature/\n feature/g' %t1 +; RUN: sed -i 's/\\n/ /g' %t1 +; RUN: FileCheck --input-file %t1 %s + +; RUN: rm -rf %t && mkdir %t +; RUN: %python %S/../../../lib/Analysis/models/gen-regalloc-eviction-test-model.py %t_savedmodel +; RUN: %python %S/../../../lib/Analysis/models/saved-model-to-tflite.py %t_savedmodel %t +; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \ +; RUN: -regalloc-training-log=%t2 -tfutils-text-log -regalloc-model=%t \ +; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll +; RUN: sed -i 's/ \+/ /g' %t2 +; RUN: sed -i 's/\\n key:/\n key:/g' %t2 +; RUN: sed -i 's/\\n feature/\n feature/g' %t2 +; RUN: sed -i 's/\\n/ /g' %t2 +; RUN: FileCheck --input-file %t2 %s + +; CHECK-NOT: nan +; CHECK-LABEL: key: \"instructions\" +; Check the first five opcodes in the first eviction problem +; CHECK-NEXT: value: 19 +; CHECK-SAME: value: 19 +; CHECK-SAME: value: 3031 +; CHECK-SAME: value: 1245 +; CHECK-SAME: value: 1264 +; The first eviction problem is significantly less than 300 instructions. Check +; that there is a zero value +; CHECK-SAME: value: 0 +; Only the candidate virtreg and the 10th LR are included in this problem. Make +; sure the other LRs have values of zero. +; CHECK-LABEL: key: \"instructions_mapping\" +; CHECK-COUNT-2700: value: 0 +; CHECK-SAME: value: 1 +; Indexing 300 back from where the candidate vr actual resides due to the fact +; that not all the values between the 10th LR and the candidate are zero. +; CHECK-COUNT-6600: value: 0 +; CHECK-SAME: value: 1 +; Ensure that we can still go through the mapping matrices for the rest of the +; eviction problems to make sure we haven't hit the end of the matrix above. +; There are a total of 23 eviction problems with this test. +; CHECK-COUNT-22: int64_list +; CHECK: key: \"is_free\" diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -35,6 +35,7 @@ TypeTraitsTest.cpp TargetOptionsTest.cpp TestAsmPrinter.cpp + MLRegallocDevelopmentFeatures.cpp ) add_subdirectory(GlobalISel) diff --git a/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp b/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/CodeGen/MLRegallocDevelopmentFeatures.cpp @@ -0,0 +1,209 @@ +//===- MLRegAllocDevelopmentFeatures.cpp - test dev MLRegalloc features ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../../lib/CodeGen/MLRegallocEvictAdvisor.h" +#include "llvm/Analysis/NoInferenceModelRunner.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CodeGen.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include + +using testing::ContainerEq; +using testing::Test; + +struct LRPosInfoIndexes { + size_t StartIndex; + size_t EndIndex; + size_t PhysReg; +}; + +class RegallocDevelopmentFeaturesTest : public ::Test { +protected: + SmallVector + setupOverlapProblem(const SmallVectorImpl &Segments, + ilist &IndexList) { + SmallVector PositionsToReturn; + PositionsToReturn.reserve(Segments.size()); + for (auto CurrentPosIndexInfo : Segments) { + LRStartEndInfo CurrentPosInfo = {}; + CurrentPosInfo.Pos = CurrentPosIndexInfo.PhysReg; + PositionsToReturn.push_back(CurrentPosInfo); + } + size_t CurrentSegmentIndex = 0; + size_t CurrentIndex = 0; + while (CurrentSegmentIndex < Segments.size()) { + auto *CurrentLEMem = static_cast( + Allocator.Allocate(sizeof(IndexListEntry), alignof(IndexListEntry))); + auto *CurrentListEntry = + new (CurrentLEMem) IndexListEntry(nullptr, CurrentIndex); + IndexList.push_back(CurrentListEntry); + for (size_t CurrentPosInfoIndex = 0; + CurrentPosInfoIndex < Segments.size(); ++CurrentPosInfoIndex) { + if ((CurrentIndex / SlotIndex::InstrDist) == + Segments[CurrentPosInfoIndex].StartIndex) { + PositionsToReturn[CurrentPosInfoIndex].Begin = + SlotIndex(CurrentListEntry, 0); + } else if ((CurrentIndex / SlotIndex::InstrDist) == + Segments[CurrentPosInfoIndex].EndIndex) { + PositionsToReturn[CurrentPosInfoIndex].End = + SlotIndex(CurrentListEntry, 0); + ++CurrentSegmentIndex; + } + } + CurrentIndex += SlotIndex::InstrDist; + } + return PositionsToReturn; + } + + NoInferenceModelRunner setupModelRunner() { + const std::vector Inputs{ + TensorSpec::createSpec("instructions", InstructionsShape), + TensorSpec::createSpec("instructions_mapping", + InstructionsMappingShape)}; + LLVMContext Ctx; + return NoInferenceModelRunner(Ctx, Inputs); + } + + std::vector + getExpectedMappingMatrix(SmallVectorImpl &OverlapSetup) { + std::vector ExpectedMappingMatrix( + NumberOfInterferences * ModelMaxSupportedInstructionCount, 0); + for (auto NewSegment : OverlapSetup) { + for (size_t CurrentIndex = NewSegment.StartIndex; + CurrentIndex <= NewSegment.EndIndex; ++CurrentIndex) { + ExpectedMappingMatrix[NewSegment.PhysReg * + ModelMaxSupportedInstructionCount + + CurrentIndex] = 1; + } + } + return ExpectedMappingMatrix; + } + + void runOverlapTest(SmallVectorImpl &OverlapSetup) { + ilist IndexList; + auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); + NoInferenceModelRunner ModelRunner = setupModelRunner(); + size_t MaxIndex = 0; + for (size_t CurrentOverlap = 0; CurrentOverlap < OverlapSetup.size(); + ++CurrentOverlap) { + if (OverlapSetup[CurrentOverlap].EndIndex > + OverlapSetup[MaxIndex].EndIndex) { + MaxIndex = CurrentOverlap; + } + } + SlotIndex LastIndex = OverlapProblem[MaxIndex].End; + extractInstructionFeatures( + OverlapProblem, &ModelRunner, + [](SlotIndex InputSlot) -> int { return 0; }, 0, 1, LastIndex); + std::vector MappingMatrix( + ModelRunner.getTensor(1), + ModelRunner.getTensor(1) + + NumberOfInterferences * ModelMaxSupportedInstructionCount); + ASSERT_THAT(MappingMatrix, + ContainerEq(getExpectedMappingMatrix(OverlapSetup))); + IndexList.clearAndLeakNodesUnsafely(); + } + + BumpPtrAllocator Allocator; +}; + +// meta tests to ensure that test setup works correctly + +TEST_F(RegallocDevelopmentFeaturesTest, + MetaOverlapInstructionDistancesAreCorrect) { + SmallVector OverlapSetup; + OverlapSetup.push_back({0, 5, 0}); + OverlapSetup.push_back({5, 10, 0}); + ilist IndexList; + auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); + ASSERT_EQ(OverlapProblem[0].End.distance(OverlapProblem[1].End), + 5 * SlotIndex::InstrDist); + ASSERT_EQ(OverlapProblem[0].End.distance(OverlapProblem[1].Begin), 0); +} + +TEST_F(RegallocDevelopmentFeaturesTest, MetaSlotIndicesAreValid) { + SmallVector OverlapSetup; + OverlapSetup.push_back({0, 10, 0}); + ilist IndexList; + auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); + ASSERT_TRUE(OverlapProblem[0].Begin.isValid()); + ASSERT_TRUE(OverlapProblem[0].End.isValid()); +} + +// Testing of feature extraction for per-instruction features + +TEST_F(RegallocDevelopmentFeaturesTest, InstructionOpcodesAreCorrect) { + SmallVector OverlapSetup; + OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 0}); + ilist IndexList; + auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); + NoInferenceModelRunner ModelRunner = setupModelRunner(); + SlotIndex LastIndex = OverlapProblem[0].End; + SlotIndex FirstIndex = OverlapProblem[0].Begin; + extractInstructionFeatures( + OverlapProblem, &ModelRunner, + [FirstIndex](SlotIndex InputSlot) -> int { + return FirstIndex.distance(InputSlot) / SlotIndex::InstrDist; + }, + 0, 1, LastIndex); + for (size_t CurrentInstructionIndex = 0; + CurrentInstructionIndex < ModelMaxSupportedInstructionCount; + ++CurrentInstructionIndex) { + ASSERT_EQ( + (size_t)ModelRunner.getTensor(0)[CurrentInstructionIndex], + CurrentInstructionIndex); + } +} + +TEST_F(RegallocDevelopmentFeaturesTest, FullOverlap) { + SmallVector OverlapSetup; + OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 0}); + OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 1}); + runOverlapTest(OverlapSetup); +} + +TEST_F(RegallocDevelopmentFeaturesTest, PartialOverlap) { + SmallVector OverlapSetup; + OverlapSetup.push_back({0, 20, 0}); + OverlapSetup.push_back({15, 30, 1}); + runOverlapTest(OverlapSetup); +} + +TEST_F(RegallocDevelopmentFeaturesTest, PartialOverlapOpposite) { + SmallVector OverlapSetup; + OverlapSetup.push_back({15, 30, 1}); + OverlapSetup.push_back({0, 20, 0}); + runOverlapTest(OverlapSetup); +} + +TEST_F(RegallocDevelopmentFeaturesTest, InternalOverlap) { + SmallVector OverlapSetup; + OverlapSetup.push_back({0, 30, 0}); + OverlapSetup.push_back({10, 20, 1}); + runOverlapTest(OverlapSetup); +} + +TEST_F(RegallocDevelopmentFeaturesTest, TripleInternalOverlap) { + SmallVector OverlapSetup; + OverlapSetup.push_back({0, 30, 0}); + OverlapSetup.push_back({10, 25, 1}); + OverlapSetup.push_back({15, 20, 2}); + runOverlapTest(OverlapSetup); +} + +TEST_F(RegallocDevelopmentFeaturesTest, InternalMultiOverlap) { + SmallVector OverlapSetup; + OverlapSetup.push_back({0, 45, 0}); + OverlapSetup.push_back({30, 40, 1}); + OverlapSetup.push_back({35, 60, 2}); + runOverlapTest(OverlapSetup); +}