diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -64,6 +64,13 @@ "regalloc-model", cl::Hidden, cl::desc("The model being trained for register allocation eviction")); +static cl::opt EnableDevelopmentFeatures( + "regalloc-enable-development-features", cl::Hidden, + cl::desc("Whether or not to enable features under development for the ML " + "regalloc advisor")); + +#else +static const bool EnableDevelopmentFeatures = false; #endif // #ifdef LLVM_HAVE_TF_API extern cl::opt EvictInterferenceCutoff; @@ -125,6 +132,22 @@ static const int64_t CandidateVirtRegPos = MaxInterferences; static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; +// The number of instructions that a specific live range might have is variable, +// but we're passing in a single matrix of instructions and tensorflow saved +// models only support a fixed input size, so we have to cap the number of +// instructions that can be passed along. The specific value was derived from +// experimentation such that the majority of eviction problems would be +// completely covered. +static const int ModelMaxSupportedInstructionCount = 300; +static const std::vector InstructionsShape{ + 1, ModelMaxSupportedInstructionCount}; +static const std::vector InstructionsMappingShape{ + 1, NumberOfInterferences, ModelMaxSupportedInstructionCount}; + +// The model can only accept a specified number of opcodes and will error it if +// fed an opcode it hasn't seen before. This constant sets the current cutoff. +static const int OpcodeValueCutoff = 17716; + // Most features are as described above, so we'll reuse this vector in defining // them. static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; @@ -193,6 +216,19 @@ "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") +#ifdef LLVM_HAVE_TF_API +#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \ + M(int64_t, instructions, InstructionsShape, \ + "Opcodes of the instructions covered by the eviction problem") + +#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \ + M(int64_t, instructions_mapping, InstructionsMappingShape, \ + "A binary matrix mapping LRs to instruction opcodes") +#else +#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) +#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) +#endif + // The model learns to pick one of the mask == 1 interferences. This is the // name of the output tensor. The contract with the model is that the output // will be guaranteed to be to a mask == 1 position. Using a macro here to @@ -201,10 +237,13 @@ // Named features index. enum FeatureIDs { -#define _FEATURE_IDX(_, name, __, ___) name, - RA_EVICT_FEATURES_LIST(_FEATURE_IDX) +#define _FEATURE_IDX_SIMPLE(_, name, __, ___) name +#define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D), + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount, + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount, + RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount #undef _FEATURE_IDX - FeatureCount +#undef _FEATURE_IDX_SIMPLE }; // The ML advisor will typically have a sparse input to the evaluator, because @@ -223,7 +262,11 @@ std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ getTotalSize(SHAPE)); RA_EVICT_FEATURES_LIST(_RESET) + if (EnableDevelopmentFeatures) { + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET) #undef _RESET + } } // Per-live interval components that get aggregated into the feature values @@ -239,6 +282,17 @@ bool IsRemat = false; }; +// LRStartEndInfo contains the start and end of a specific live range as +// slot indices as well as storing the index of the physical register it +// is assigned to (or 1 above the phys reg count if its the candidate). +// Used when extracting per-instruction features in the context of a +// specific eviction problem. +struct LRStartEndInfo { + SlotIndex Begin; + SlotIndex End; + size_t Pos = 0; +}; + using CandidateRegList = std::array, NumberOfInterferences>; using FeaturesListNormalizer = @@ -272,11 +326,11 @@ /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. - bool loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, - const SmallVirtRegSet &FixedRegisters, - llvm::SmallVectorImpl &Largest, - size_t Pos) const; + bool + loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, + bool IsHint, const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl &Largest, size_t Pos, + SmallVectorImpl &LRPosInfo) const; private: static float getInitialQueueSize(const MachineFunction &MF); @@ -288,8 +342,11 @@ void extractFeatures(const SmallVectorImpl &Intervals, llvm::SmallVectorImpl &Largest, size_t Pos, - int64_t IsHint, int64_t LocalIntfsCount, - float NrUrgent) const; + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl &LRPosInfo) const; + + void extractInstructionFeatures( + llvm::SmallVectorImpl &LRPosInfo) const; // Point-in-time: we didn't learn this, so we always delegate to the // default. @@ -332,7 +389,13 @@ public: ReleaseModeEvictionAdvisorAnalysis() : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + if (EnableDevelopmentFeatures) { + InputFeatures = {RA_EVICT_FEATURES_LIST( + _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + } } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { @@ -399,12 +462,25 @@ public: DevelopmentModeEvictionAdvisorAnalysis() : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; - TrainingInputFeatures = { - RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec("action_discount", {1}), - TensorSpec::createSpec("action_step_type", {1}), - TensorSpec::createSpec("action_reward", {1})}; + if (EnableDevelopmentFeatures) { + InputFeatures = {RA_EVICT_FEATURES_LIST( + _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES) + RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { @@ -535,7 +611,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures( const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, - llvm::SmallVectorImpl &Largest, size_t Pos) const { + llvm::SmallVectorImpl &Largest, size_t Pos, + llvm::SmallVectorImpl &LRPosInfo) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { // leave unavailable @@ -594,7 +671,7 @@ // OK, so if we made it this far, this LR is an eviction candidate, load its // features. extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs, - NrUrgent); + NrUrgent, LRPosInfo); return true; } @@ -638,6 +715,7 @@ // reset all the features to 0) Use Pos to capture the column we load // features at - in AllocationOrder order. size_t Pos = 0; + SmallVector LRPosInfo; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { MCRegister PhysReg = *I; @@ -647,7 +725,7 @@ continue; } if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters, - Largest, Pos)) { + Largest, Pos, LRPosInfo)) { ++Available; Regs[Pos] = std::make_pair(PhysReg, true); } @@ -665,9 +743,14 @@ extractFeatures(SmallVector(1, &VirtReg), Largest, CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, - /*NrUrgent*/ 0.0); + /*NrUrgent*/ 0.0, LRPosInfo); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); +#ifdef LLVM_HAVE_TF_API + if (EnableDevelopmentFeatures) { + extractInstructionFeatures(LRPosInfo); + } +#endif // #ifdef LLVM_HAVE_TF_API // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; @@ -752,7 +835,8 @@ void MLEvictAdvisor::extractFeatures( const SmallVectorImpl &Intervals, llvm::SmallVectorImpl &Largest, size_t Pos, int64_t IsHint, - int64_t LocalIntfsCount, float NrUrgent) const { + int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl &LRPosInfo) const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; double R = 0.0; @@ -799,6 +883,13 @@ HintWeights += LIFC.HintWeights; NrRematerializable += LIFC.IsRemat; + + if (EnableDevelopmentFeatures) { + for (auto CurrentSegment : LI) { + LRPosInfo.push_back( + LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos}); + } + } } size_t Size = 0; if (!Intervals.empty()) { @@ -843,6 +934,97 @@ // Development mode-specific implementations #ifdef LLVM_HAVE_TF_API + +void MLEvictAdvisor::extractInstructionFeatures( + SmallVectorImpl &LRPosInfo) const { + // This function extracts instruction based features relevant to the eviction + // problem currently being solved. This function ends up extracting two + // tensors. + // 1 - A vector of opcodes in sequential order of size (max instruction count) + // 2 - A binary mapping matrix of size (LR count * max instruction count) + // which maps where the LRs are live to the actual opcodes for which they are + // live. + + // Start off by sorting the segments based on the beginning slot index. + std::sort( + LRPosInfo.begin(), LRPosInfo.end(), + [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; }); + size_t InstructionIndex = 0; + size_t CurrentSegmentIndex = 0; + SlotIndex CurrentIndex = LRPosInfo[0].Begin; + // This loop processes all the segments sequentially by starting at the + // beginning slot index of the first segment, iterating through all the slot + // indices before the end slot index of that segment (while checking for + // overlaps with segments that start at greater slot indices). After hitting + // that end index, the current segment being processed gets bumped until they + // are all processed or the max instruction count is hit, where everything is + // just truncated. + while (true) { + // If the index that we are currently at is within the current segment and + // we haven't hit the max instruction count, continue processing the current + // segment. + while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End && + InstructionIndex < ModelMaxSupportedInstructionCount) { + auto *CurrentMachineInstruction = + LIS->getInstructionFromIndex(CurrentIndex); + // If the current machine instruction is null, skip it + if (!CurrentMachineInstruction) { + // If we're currently at the last index in the SlotIndex analysis, + // we can't go any further, so return from the function + if (CurrentIndex >= LIS->getSlotIndexes()->getLastIndex()) { + return; + } + CurrentIndex = CurrentIndex.getNextIndex(); + continue; + } + auto CurrentOpcode = CurrentMachineInstruction->getOpcode(); + Runner->getTensor(FeatureIDs::instructions)[InstructionIndex] = + CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0; + // set value in the binary mapping matrix for the current instruction + auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos; + Runner->getTensor(FeatureIDs::instructions_mapping) + [CurrentSegmentPosition * ModelMaxSupportedInstructionCount + + InstructionIndex] = 1; + // All of the segments are sorted based on the beginning slot index, but + // this doesn't mean that the beginning slot index of the next segment is + // after the end segment of the one being currently processed. This while + // loop checks for overlapping segments and modifies the portion of the + // column in the mapping matrix for the currently processed instruction + // for the LR it is checking. Also make sure that the beginning of the + // current segment we're checking for overlap in is less than the current + // index, otherwise we're done checking overlaps. + size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1; + while (OverlapCheckCurrentSegment < LRPosInfo.size() && + LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) { + auto OverlapCurrentSegmentPosition = + LRPosInfo[OverlapCheckCurrentSegment].Pos; + Runner->getTensor(FeatureIDs::instructions_mapping) + [OverlapCurrentSegmentPosition * ModelMaxSupportedInstructionCount + + InstructionIndex] = 1; + ++OverlapCheckCurrentSegment; + } + ++InstructionIndex; + CurrentIndex = CurrentIndex.getNextIndex(); + } + // if we've just finished processing through the last segment or if we've + // hit the maximum number of instructions, break out of the loop. + if (CurrentSegmentIndex == LRPosInfo.size() - 1 || + InstructionIndex >= ModelMaxSupportedInstructionCount) { + break; + } + // just finished processing the current segment, transition to the next one + if (LRPosInfo[CurrentSegmentIndex + 1].Begin <= + LRPosInfo[CurrentSegmentIndex].End) { + // segments are overlapping. + ++CurrentSegmentIndex; + } else { + // segments are not overlapping. + CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin; + ++CurrentSegmentIndex; + } + } +} + RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { return new DevelopmentModeEvictionAdvisorAnalysis(); } @@ -872,7 +1054,10 @@ if (TrainingLog.empty()) return Ret; size_t CurrentFeature = 0; - for (; CurrentFeature < FeatureIDs::FeatureCount; ++CurrentFeature) { + size_t FeatureCount = EnableDevelopmentFeatures + ? FeatureIDs::FeaturesWithDevelopmentCount + : FeatureIDs::FeatureCount; + for (; CurrentFeature < FeatureCount; ++CurrentFeature) { Log->logSpecifiedTensorValue( CurrentFeature, reinterpret_cast( getRunner().getTensorUntyped(CurrentFeature))); diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll @@ -0,0 +1,52 @@ +; REQUIRES: have_tf_api +; REQUIRES: x86_64-linux +; +; Check that we log the currently in development features correctly with both the default +; case and with a learned policy. +; +; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \ +; RUN: -regalloc-training-log=%t1 -tfutils-text-log \ +; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll +; RUN: sed -i 's/ \+/ /g' %t1 +; RUN: sed -i 's/\\n key:/\n key:/g' %t1 +; RUN: sed -i 's/\\n feature/\n feature/g' %t1 +; RUN: sed -i 's/\\n/ /g' %t1 +; RUN: FileCheck --input-file %t1 %s + +; RUN: rm -rf %t && mkdir %t +; RUN: %python %S/../../../lib/Analysis/models/gen-regalloc-eviction-test-model.py %t_savedmodel +; RUN: %python %S/../../../lib/Analysis/models/saved-model-to-tflite.py %t_savedmodel %t +; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \ +; RUN: -regalloc-training-log=%t2 -tfutils-text-log -regalloc-model=%t \ +; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll +; RUN: sed -i 's/ \+/ /g' %t2 +; RUN: sed -i 's/\\n key:/\n key:/g' %t2 +; RUN: sed -i 's/\\n feature/\n feature/g' %t2 +; RUN: sed -i 's/\\n/ /g' %t2 +; RUN: FileCheck --input-file %t2 %s + +; CHECK-NOT: nan +; CHECK-LABEL: key: \"instructions\" +; Check the first five opcodes in the first eviction problem +; CHECK-NEXT: value: 19 +; CHECK-SAME: value: 19 +; CHECK-SAME: value: 3031 +; CHECK-SAME: value: 1245 +; CHECK-SAME: value: 1264 +; The first eviction problem is significantly less than 300 instructions. Check +; that there is a zero value +; CHECK-SAME: value: 0 +; Only the candidate virtreg and the 10th LR are included in this problem. Make +; sure the other LRs have values of zero. +; CHECK-LABEL: key: \"instructions_mapping\" +; CHECK-COUNT-2700: value: 0 +; CHECK-SAME: value: 1 +; Indexing 300 back from where the candidate vr actual resides due to the fact +; that not all the values between the 10th LR and the candidate are zero. +; CHECK-COUNT-6600: value: 0 +; CHECK-SAME: value: 1 +; Ensure that we can still go through the mapping matrices for the rest of the +; eviction problems to make sure we haven't hit the end of the matrix above. +; There are a total of 23 eviction problems with this test. +; CHECK-COUNT-22: int64_list +; CHECK: key: \"is_free\"