diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -64,6 +64,13 @@ "regalloc-model", cl::Hidden, cl::desc("The model being trained for register allocation eviction")); +static cl::opt EnableDevelopmentFeatures( + "regalloc-enable-development-features", cl::Hidden, + cl::desc("Whether or not to enable features under development for the ML " + "regalloc advisor")); + +#else +static const bool EnableDevelopmentFeatures = false; #endif // #ifdef LLVM_HAVE_TF_API extern cl::opt EvictInterferenceCutoff; @@ -125,6 +132,22 @@ static const int64_t CandidateVirtRegPos = MaxInterferences; static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; +// The number of instructions that a specific live range might have is variable, +// but we're passing in a single matrix of instructions and tensorflow saved +// models only support a fixed input size, so we have to cap the number of +// instructions that can be passed along. The specific value was derived from +// experimentation such that the majority of eviction problems would be +// completely covered. +static const int ModelMaxSupportedInstructionCount = 300; +static const std::vector InstructionsShape{ + 1, ModelMaxSupportedInstructionCount}; +static const std::vector InstructionsMappingShape{ + 1, NumberOfInterferences, ModelMaxSupportedInstructionCount}; + +// The model can only accept a specified number of opcodes and will error it if +// fed an opcode it hasn't seen before. This constant sets the current cutoff. +static const int OpcodeCountCutoff = 17716; + // Most features are as described above, so we'll reuse this vector in defining // them. static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; @@ -193,6 +216,16 @@ "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") +#ifdef LLVM_HAVE_TF_API +#define RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(M) \ + M(int64_t, instructions, InstructionsShape, \ + "Opcodes of the instructions covered by the eviction problem") \ + M(int64_t, instructions_mapping, InstructionsMappingShape, \ + "A binary matrix mapping LRs to instruction opcodes") +#else +#define RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(M) +#endif + // The model learns to pick one of the mask == 1 interferences. This is the // name of the output tensor. The contract with the model is that the output // will be guaranteed to be to a mask == 1 position. Using a macro here to @@ -202,9 +235,10 @@ // Named features index. enum FeatureIDs { #define _FEATURE_IDX(_, name, __, ___) name, - RA_EVICT_FEATURES_LIST(_FEATURE_IDX) + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount, + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_FEATURE_IDX) + FeaturesWithDevelopmentCount #undef _FEATURE_IDX - FeatureCount }; // The ML advisor will typically have a sparse input to the evaluator, because @@ -224,6 +258,15 @@ getTotalSize(SHAPE)); RA_EVICT_FEATURES_LIST(_RESET) #undef _RESET + // when resetting the features under development we need to make sure to skip + // FeatureIDs::FeatureCount + if (EnableDevelopmentFeatures) { +#define _RESET_DEV(TYPE, NAME, SHAPE, __) \ + std::memset(Runner.getTensorUntyped(FeatureIDs::NAME - 1), 0, \ + getTotalSize(SHAPE)); + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_RESET_DEV) + } +#undef _RESET_DEV } // Per-live interval components that get aggregated into the feature values @@ -239,6 +282,12 @@ bool IsRemat = false; }; +struct LRStartEndInfo { + SlotIndex Begin; + SlotIndex End; + size_t Pos; +}; + using CandidateRegList = std::array, NumberOfInterferences>; using FeaturesListNormalizer = @@ -272,11 +321,11 @@ /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. - bool loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, - const SmallVirtRegSet &FixedRegisters, - llvm::SmallVectorImpl &Largest, - size_t Pos) const; + bool + loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, + bool IsHint, const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl &Largest, size_t Pos, + SmallVectorImpl &LRPosInfo) const; private: static float getInitialQueueSize(const MachineFunction &MF); @@ -288,8 +337,11 @@ void extractFeatures(const SmallVectorImpl &Intervals, llvm::SmallVectorImpl &Largest, size_t Pos, - int64_t IsHint, int64_t LocalIntfsCount, - float NrUrgent) const; + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl &LRPosInfo) const; + + void extractInstructionFeatures( + llvm::SmallVectorImpl &LRPosInfo) const; // Point-in-time: we didn't learn this, so we always delegate to the // default. @@ -332,7 +384,13 @@ public: ReleaseModeEvictionAdvisorAnalysis() : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + if (EnableDevelopmentFeatures) { + InputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_FEATURES) + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_DECL_FEATURES)}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + } } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { @@ -399,12 +457,24 @@ public: DevelopmentModeEvictionAdvisorAnalysis() : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; - TrainingInputFeatures = { - RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec("action_discount", {1}), - TensorSpec::createSpec("action_step_type", {1}), - TensorSpec::createSpec("action_reward", {1})}; + if (EnableDevelopmentFeatures) { + InputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_FEATURES) + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_DECL_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { @@ -535,7 +605,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures( const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, - llvm::SmallVectorImpl &Largest, size_t Pos) const { + llvm::SmallVectorImpl &Largest, size_t Pos, + llvm::SmallVectorImpl &LRPosInfo) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { // leave unavailable @@ -594,7 +665,7 @@ // OK, so if we made it this far, this LR is an eviction candidate, load its // features. extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs, - NrUrgent); + NrUrgent, LRPosInfo); return true; } @@ -638,6 +709,7 @@ // reset all the features to 0) Use Pos to capture the column we load // features at - in AllocationOrder order. size_t Pos = 0; + SmallVector LRPosInfo; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { MCRegister PhysReg = *I; @@ -647,7 +719,7 @@ continue; } if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters, - Largest, Pos)) { + Largest, Pos, LRPosInfo)) { ++Available; Regs[Pos] = std::make_pair(PhysReg, true); } @@ -665,9 +737,14 @@ extractFeatures(SmallVector(1, &VirtReg), Largest, CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, - /*NrUrgent*/ 0.0); + /*NrUrgent*/ 0.0, LRPosInfo); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); +#ifdef LLVM_HAVE_TF_API + if (EnableDevelopmentFeatures) { + extractInstructionFeatures(LRPosInfo); + } +#endif // #ifdef LLVM_HAVE_TF_API // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; @@ -752,7 +829,8 @@ void MLEvictAdvisor::extractFeatures( const SmallVectorImpl &Intervals, llvm::SmallVectorImpl &Largest, size_t Pos, int64_t IsHint, - int64_t LocalIntfsCount, float NrUrgent) const { + int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl &LRPosInfo) const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; double R = 0.0; @@ -799,6 +877,13 @@ HintWeights += LIFC.HintWeights; NrRematerializable += LIFC.IsRemat; + + if (EnableDevelopmentFeatures) { + for (auto CurrentSegment : LI) { + LRPosInfo.push_back( + LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos}); + } + } } size_t Size = 0; if (!Intervals.empty()) { @@ -843,6 +928,103 @@ // Development mode-specific implementations #ifdef LLVM_HAVE_TF_API + +void MLEvictAdvisor::extractInstructionFeatures( + SmallVectorImpl &LRPosInfo) const { + // This function extracts instruction based features relevant to the eviction + // problem currently being solved. This function ends up extracting two + // tensors. + // 1 - A vector of opcodes in sequential order of size (max instruction count) + // 2 - A binary mapping matrix of size (LR count * max instruction count) + // which maps where the LRs are live to the actual opcodes for which they are + // live. + + // Start off by sorting the segments based on the beginning slot index. + std::sort( + LRPosInfo.begin(), LRPosInfo.end(), + [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; }); + size_t InstructionCount = 0; + size_t CurrentSegment = 0; + SlotIndex CurrentIndex = LRPosInfo[0].Begin; + // This loop processes all the segments sequentially by starting at the + // beginning slot index of the first segment, iterating through all the slot + // indices before the end slot index of that segment (while checking for + // overlaps with segments that start at greater slot indices). After hitting + // that end index, the current segment being process gets bumped until they + // are all processed or the max instruction count is hit, where everything is + // just truncated. + while (true) { + // If the index that we are currently at is within the current segment and + // we haven't hit the max instruction count, continue processing the current + // segment. + while (CurrentIndex <= LRPosInfo[CurrentSegment].End && + InstructionCount < ModelMaxSupportedInstructionCount) { + auto *CurrentMachineInstruction = + LIS->getInstructionFromIndex(CurrentIndex); + // If the current machine instruction is null, skip it + if (!CurrentMachineInstruction) { + // If we're currently at the last index in the SlotIndex analysis, + // we can't go any further, so return from the function + if (CurrentIndex >= LIS->getSlotIndexes()->getLastIndex()) { + return; + } + CurrentIndex = CurrentIndex.getNextIndex(); + continue; + } + auto CurrentOpcode = CurrentMachineInstruction->getOpcode(); + Runner->getTensor(FeatureIDs::instructions - + 1)[InstructionCount] = + CurrentOpcode < OpcodeCountCutoff ? CurrentOpcode : 0; + // set value in the binary mapping matrix for the current instruction + auto CurrentSegmentPosition = LRPosInfo[CurrentSegment].Pos; + Runner->getTensor( + FeatureIDs::instructions_mapping - + 1)[CurrentSegmentPosition * ModelMaxSupportedInstructionCount + + InstructionCount] = 1; + // All of the segments are sorted based on the beginning slot index, but + // this doesn't mean that the beginning slot index of the next segment is + // after the end segment of the one being currently processed. This while + // loop checks for overlapping segments and modifies the portion of the + // column in the mapping matrix for the currently processed instruction + // for the LR it is checking. + size_t OverlapCheckCurrentSegment = CurrentSegment + 1; + while (OverlapCheckCurrentSegment < LRPosInfo.size()) { + // If the beginning of the segment being currently checked is greater + // than the index being currently processed, this segment and all future + // segments are guaranteed to not overlap, so we can break out of the + // loop. + if (LRPosInfo[OverlapCheckCurrentSegment].Begin > CurrentIndex) { + break; + } + auto OverlapCurrentSegmentPosition = + LRPosInfo[OverlapCheckCurrentSegment].Pos; + Runner->getTensor(FeatureIDs::instructions_mapping - + 1)[OverlapCurrentSegmentPosition * + ModelMaxSupportedInstructionCount + + InstructionCount] = 1; + ++OverlapCheckCurrentSegment; + } + ++InstructionCount; + CurrentIndex = CurrentIndex.getNextIndex(); + } + // if we've just finished processing through the last segment or if we've + // hit the maximum number of instructions, break out of the loop. + if (CurrentSegment == LRPosInfo.size() - 1 || + InstructionCount >= ModelMaxSupportedInstructionCount) { + break; + } + // just finished processing the current segment, transition to the next one + if (LRPosInfo[CurrentSegment + 1].Begin <= LRPosInfo[CurrentSegment].End) { + // segments are overlapping. + ++CurrentSegment; + } else { + // segments are not overlapping. + CurrentIndex = LRPosInfo[CurrentSegment + 1].Begin; + ++CurrentSegment; + } + } +} + RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { return new DevelopmentModeEvictionAdvisorAnalysis(); } @@ -877,6 +1059,18 @@ CurrentFeature, reinterpret_cast( getRunner().getTensorUntyped(CurrentFeature))); } + if (EnableDevelopmentFeatures) { + // Continue were the previous loop left off, but only iterate up to one less + // than the development feature count as the non development feature count + // takes up one index in the FeatureIDs enum but not in the runner's list + // of buffers + for (; CurrentFeature < FeatureIDs::FeaturesWithDevelopmentCount - 1; + ++CurrentFeature) { + Log->logSpecifiedTensorValue( + CurrentFeature, reinterpret_cast( + getRunner().getTensorUntyped(CurrentFeature))); + } + } if (auto *MUTR = dyn_cast(&getRunner())) for (size_t I = 1; I < MUTR->outputLoggedFeatureSpecs().size(); ++I, ++CurrentFeature) diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll @@ -0,0 +1,51 @@ +; REQUIRES: have_tf_api +; REQUIRES: x86_64-linux +; +; Check that we log the currently in development features correctly with both the default +; case and with a learned policy. +; +; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \ +; RUN: -regalloc-training-log=%t1 -tfutils-text-log \ +; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll +; RUN: sed -i 's/ \+/ /g' %t1 +; RUN: sed -i 's/\\n key:/\n key:/g' %t1 +; RUN: sed -i 's/\\n feature/\n feature/g' %t1 +; RUN: sed -i 's/\\n/ /g' %t1 +; RUN: FileCheck --input-file %t1 %s + +; RUN: rm -rf %t && mkdir %t +; RUN: %python %S/../../../lib/Analysis/models/gen-regalloc-eviction-test-model.py %t +; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \ +; RUN: -regalloc-training-log=%t2 -tfutils-text-log -regalloc-model=%t \ +; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll +; RUN: sed -i 's/ \+/ /g' %t2 +; RUN: sed -i 's/\\n key:/\n key:/g' %t2 +; RUN: sed -i 's/\\n feature/\n feature/g' %t2 +; RUN: sed -i 's/\\n/ /g' %t2 +; RUN: FileCheck --input-file %t2 %s + +; CHECK-NOT: nan +; CHECK-LABEL: key: \"instructions\" +; Check the first five opcodes in the first eviction problem +; CHECK-NEXT: value: 19 +; CHECK-SAME: value: 19 +; CHECK-SAME: value: 3030 +; CHECK-SAME: value: 1245 +; CHECK-SAME: value: 1264 +; The first eviction problem is significantly less than 300 instructions. Check +; that there is a zero value +; CHECK-SAME: value: 0 +; Only the candidate virtreg and the 10th LR are included in this problem. Make +; sure the other LRs have values of zero. +; CHECK-LABEL: key: \"instructions_mapping\" +; CHECK-COUNT-2700: value: 0 +; CHECK-SAME: value: 1 +; Indexing 300 back from where the candidate vr actual resides due to the fact +; that not all the values between the 10th LR and the candidate are zero. +; CHECK-COUNT-6600: value: 0 +; CHECK-SAME: value: 1 +; Ensure that we can still go through the mapping matrices for the rest of the +; eviction problems to make sure we haven't hit the end of the matrix above. +; There are a total of 23 eviction problems with this test. +; CHECK-COUNT-22: int64_list +; CHECK: key: \"is_free\"