diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -64,6 +64,13 @@ "regalloc-model", cl::Hidden, cl::desc("The model being trained for register allocation eviction")); +static cl::opt EnableDevelopmentFeatures( + "regalloc-enable-development-features", cl::Hidden, + cl::desc("Whether or not to enable features under development for the ML " + "regalloc advisor")); + +#else +static const bool EnableDevelopmentFeatures = false; #endif // #ifdef LLVM_HAVE_TF_API extern cl::opt EvictInterferenceCutoff; @@ -125,6 +132,22 @@ static const int64_t CandidateVirtRegPos = MaxInterferences; static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; +// The number of instructions that a specific live range might have is variable, +// but we're passing in a single matrix of instructions and tensorflow saved +// models only support a fixed input size, so we have to cap the number of +// instructions that can be passed along. The specific value was derived from +// experimentation such that the majority of eviction problems would be +// completely covered. +static const int ModelMaxSupportedInstructionCount = 300; +static const std::vector InstructionsShape{ + 1, ModelMaxSupportedInstructionCount}; +static const std::vector InstructionsMappingShape{ + 1, NumberOfInterferences, ModelMaxSupportedInstructionCount}; + +// The model can only accept a specified number of opcodes and will error it if +// fed an opcode it hasn't seen before. This constant sets the current cutoff. +static const int OpcodeCountCutoff = 17716; + // Most features are as described above, so we'll reuse this vector in defining // them. static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; @@ -193,6 +216,16 @@ "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") +#ifdef LLVM_HAVE_TF_API +#define RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(M) \ + M(int64_t, instructions, InstructionsShape, \ + "Opcodes of the instructions covered by the eviction problem") \ + M(int64_t, instructions_mapping, InstructionsMappingShape, \ + "A binary matrix mapping LRs to instruction opcodes") +#else +#define RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(M) +#endif + // The model learns to pick one of the mask == 1 interferences. This is the // name of the output tensor. The contract with the model is that the output // will be guaranteed to be to a mask == 1 position. Using a macro here to @@ -202,9 +235,10 @@ // Named features index. enum FeatureIDs { #define _FEATURE_IDX(_, name, __, ___) name, - RA_EVICT_FEATURES_LIST(_FEATURE_IDX) + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount, + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_FEATURE_IDX) + FeaturesWithDevelopmentCount #undef _FEATURE_IDX - FeatureCount }; // The ML advisor will typically have a sparse input to the evaluator, because @@ -224,6 +258,15 @@ getTotalSize(SHAPE)); RA_EVICT_FEATURES_LIST(_RESET) #undef _RESET + // when resetting the features under development we need to make sure to skip + // FeatureIDs::FeatureCount + if (EnableDevelopmentFeatures) { +#define _RESET_DEV(TYPE, NAME, SHAPE, __) \ + std::memset(Runner.getTensorUntyped(FeatureIDs::NAME - 1), 0, \ + getTotalSize(SHAPE)); + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_RESET_DEV) + } +#undef _RESET_DEV } // Per-live interval components that get aggregated into the feature values @@ -272,11 +315,12 @@ /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. - bool loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, - const SmallVirtRegSet &FixedRegisters, - llvm::SmallVectorImpl &Largest, - size_t Pos) const; + bool loadInterferenceFeatures( + const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl &Largest, size_t Pos, + SmallVectorImpl> &LRPosInfo) + const; private: static float getInitialQueueSize(const MachineFunction &MF); @@ -288,8 +332,13 @@ void extractFeatures(const SmallVectorImpl &Intervals, llvm::SmallVectorImpl &Largest, size_t Pos, - int64_t IsHint, int64_t LocalIntfsCount, - float NrUrgent) const; + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl> + &LRPosInfo) const; + + void extractInstructionFeatures( + llvm::SmallVectorImpl> + &LRPosInfo) const; // Point-in-time: we didn't learn this, so we always delegate to the // default. @@ -332,7 +381,13 @@ public: ReleaseModeEvictionAdvisorAnalysis() : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + if (EnableDevelopmentFeatures) { + InputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_FEATURES) + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_DECL_FEATURES)}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + } } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { @@ -399,12 +454,24 @@ public: DevelopmentModeEvictionAdvisorAnalysis() : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) { - InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; - TrainingInputFeatures = { - RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec("action_discount", {1}), - TensorSpec::createSpec("action_step_type", {1}), - TensorSpec::createSpec("action_reward", {1})}; + if (EnableDevelopmentFeatures) { + InputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_FEATURES) + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + RA_EVICT_FEATURES_UNDER_DEVELOPMENT_LIST(_DECL_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { @@ -535,7 +602,9 @@ bool MLEvictAdvisor::loadInterferenceFeatures( const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, - llvm::SmallVectorImpl &Largest, size_t Pos) const { + llvm::SmallVectorImpl &Largest, size_t Pos, + llvm::SmallVectorImpl> &LRPosInfo) + const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { // leave unavailable @@ -594,7 +663,7 @@ // OK, so if we made it this far, this LR is an eviction candidate, load its // features. extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs, - NrUrgent); + NrUrgent, LRPosInfo); return true; } @@ -638,6 +707,8 @@ // reset all the features to 0) Use Pos to capture the column we load // features at - in AllocationOrder order. size_t Pos = 0; + SmallVector, NumberOfInterferences> + LRPosInfo; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { MCRegister PhysReg = *I; @@ -647,7 +718,7 @@ continue; } if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters, - Largest, Pos)) { + Largest, Pos, LRPosInfo)) { ++Available; Regs[Pos] = std::make_pair(PhysReg, true); } @@ -665,9 +736,12 @@ extractFeatures(SmallVector(1, &VirtReg), Largest, CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, - /*NrUrgent*/ 0.0); + /*NrUrgent*/ 0.0, LRPosInfo); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); + if (EnableDevelopmentFeatures) { + extractInstructionFeatures(LRPosInfo); + } // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; @@ -752,7 +826,9 @@ void MLEvictAdvisor::extractFeatures( const SmallVectorImpl &Intervals, llvm::SmallVectorImpl &Largest, size_t Pos, int64_t IsHint, - int64_t LocalIntfsCount, float NrUrgent) const { + int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl> &LRPosInfo) + const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; double R = 0.0; @@ -799,6 +875,11 @@ HintWeights += LIFC.HintWeights; NrRematerializable += LIFC.IsRemat; + + for (auto CurrentSegment : LI) { + LRPosInfo.push_back( + std::make_tuple(CurrentSegment.start, CurrentSegment.end, Pos)); + } } size_t Size = 0; if (!Intervals.empty()) { @@ -841,6 +922,77 @@ #undef SET } +void MLEvictAdvisor::extractInstructionFeatures( + SmallVectorImpl> &LRPosInfo) + const { + std::sort(LRPosInfo.begin(), LRPosInfo.end(), + [](std::tuple A, + std::tuple B) { + return std::get<0>(A) < std::get<0>(B); + }); + size_t InstructionCount = 0; + size_t CurrentSegment = 0; + SlotIndex CurrentIndex = std::get<0>(LRPosInfo[0]); + while (true) { + while (CurrentIndex <= std::get<1>(LRPosInfo[CurrentSegment]) && + InstructionCount < ModelMaxSupportedInstructionCount) { + // set instruction + auto *CurrentMachineInstruction = + LIS->getInstructionFromIndex(CurrentIndex); + if (CurrentMachineInstruction == nullptr) { + CurrentIndex = CurrentIndex.getNextIndex(); + continue; + } + auto CurrentOpcode = CurrentMachineInstruction->getOpcode(); + Runner->getTensor(FeatureIDs::instructions - + 1)[InstructionCount] = + CurrentOpcode < OpcodeCountCutoff ? CurrentOpcode : 0; + // set mask for instruction + // add 1 to the resulting position as all of the segment indices are + // offset 1 as the first row is instruction opcodes + auto CurrentSegmentPosition = std::get<2>(LRPosInfo[CurrentSegment]); + Runner->getTensor( + FeatureIDs::instructions_mapping - + 1)[CurrentSegmentPosition * ModelMaxSupportedInstructionCount + + InstructionCount] = 1; + // handle the overlapping LR case + size_t OverlapCheckCurrentSegment = CurrentSegment + 1; + while (OverlapCheckCurrentSegment < LRPosInfo.size()) { + if (std::get<0>(LRPosInfo[OverlapCheckCurrentSegment]) <= + CurrentIndex) { + auto OverlapCurrentSegmentPosition = + std::get<2>(LRPosInfo[OverlapCheckCurrentSegment]); + Runner->getTensor(FeatureIDs::instructions_mapping - + 1)[OverlapCurrentSegmentPosition * + ModelMaxSupportedInstructionCount + + InstructionCount] = 1; + } else { + break; + } + ++OverlapCheckCurrentSegment; + } + ++InstructionCount; + CurrentIndex = CurrentIndex.getNextIndex(); + } + // if we've just finished processing through the last segment or if we've + // hit the maximum number of instructions, break out of the loop. + if (CurrentSegment == LRPosInfo.size() - 1 || + InstructionCount >= ModelMaxSupportedInstructionCount) { + break; + } + // just finished processing the previous segment, transition to the next one + if (std::get<0>(LRPosInfo[CurrentSegment + 1]) <= + std::get<1>(LRPosInfo[CurrentSegment])) { + // segments are overlapping. + ++CurrentSegment; + } else { + // segments are not overlapping. + CurrentIndex = std::get<0>(LRPosInfo[CurrentSegment + 1]); + ++CurrentSegment; + } + } +} + // Development mode-specific implementations #ifdef LLVM_HAVE_TF_API RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { @@ -877,6 +1029,15 @@ CurrentFeature, reinterpret_cast( getRunner().getTensorUntyped(CurrentFeature))); } + if (EnableDevelopmentFeatures) { + // Skip FeatureIDs::FeatureCount + for (; CurrentFeature < FeatureIDs::FeaturesWithDevelopmentCount - 1; + ++CurrentFeature) { + Log->logSpecifiedTensorValue( + CurrentFeature, reinterpret_cast( + getRunner().getTensorUntyped(CurrentFeature))); + } + } if (auto *MUTR = dyn_cast(&getRunner())) for (size_t I = 1; I < MUTR->outputLoggedFeatureSpecs().size(); ++I, ++CurrentFeature) diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll @@ -0,0 +1,51 @@ +; REQUIRES: have_tf_api +; REQUIRES: x86_64-linux +; +; Check that we log the currently in development features correctly with both the default +; case and with a learned policy. +; +; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \ +; RUN: -regalloc-training-log=%t1 -tfutils-text-log \ +; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll +; RUN: sed -i 's/ \+/ /g' %t1 +; RUN: sed -i 's/\\n key:/\n key:/g' %t1 +; RUN: sed -i 's/\\n feature/\n feature/g' %t1 +; RUN: sed -i 's/\\n/ /g' %t1 +; RUN: FileCheck --input-file %t1 %s + +; RUN: rm -rf %t && mkdir %t +; RUN: %python %S/../../../lib/Analysis/models/gen-regalloc-eviction-test-model.py %t +; RUN: llc -mtriple=x86_64-linux-unknown -regalloc=greedy -regalloc-enable-advisor=development \ +; RUN: -regalloc-training-log=%t2 -tfutils-text-log -regalloc-model=%t \ +; RUN: -regalloc-enable-development-features < %S/Inputs/input.ll +; RUN: sed -i 's/ \+/ /g' %t2 +; RUN: sed -i 's/\\n key:/\n key:/g' %t2 +; RUN: sed -i 's/\\n feature/\n feature/g' %t2 +; RUN: sed -i 's/\\n/ /g' %t2 +; RUN: FileCheck --input-file %t2 %s + +; CHECK-NOT: nan +; CHECK-LABEL: key: \"instructions\" +; Check the first five opcodes in the first eviction problem +; CHECK-NEXT: value: 19 +; CHECK-SAME: value: 19 +; CHECK-SAME: value: 3030 +; CHECK-SAME: value: 1245 +; CHECK-SAME: value: 1264 +; The first eviction problem is significantly less than 300 instructions. Check +; that there is a zero value +; CHECK-SAME: value: 0 +; Only the candidate virtreg and the 10th LR are included in this problem. Make +; sure the other LRs have values of zero. +; CHECK-LABEL: key: \"instructions_mapping\" +; CHECK-COUNT-2700: value: 0 +; CHECK-SAME: value: 1 +; Indexing 300 back from where the candidate vr actual resides due to the fact +; that not all the values between the 10th LR and the candidate are zero. +; CHECK-COUNT-6600: value: 0 +; CHECK-SAME: value: 1 +; Ensure that we can still go through the mapping matrices for the rest of the +; eviction problems to make sure we haven't hit the end of the matrix above. +; There are a total of 23 eviction problems with this test. +; CHECK-COUNT-22: int64_list +; CHECK: key: \"is_free\" \ No newline at end of file