diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -482,10 +482,10 @@ for (size_t I = 0; I < Spec.getElementCount(); ++I) (reinterpret_cast(Buff))[I] = static_cast((reinterpret_cast(RawData))[I]); - else if (Spec.isElementType() || Spec.isElementType()) + else if (Spec.isElementType() || Spec.isElementType()) { std::memcpy(Buff, RawData, Spec.getElementCount() * Spec.getElementByteSize()); - else + } else llvm_unreachable("Unsupported tensor type"); } diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -63,6 +63,13 @@ "regalloc-model", cl::Hidden, cl::desc("The model being trained for register allocation eviction")); +static cl::opt EnableFeaturesUnderDevelopment( + "enable-features-under-development", cl::Hidden, + cl::desc("Whether or not to enable extracting and passing features to the " + "model that are currently under development")); + +#else +static const bool EnableFeaturesUnderDevelopment = false; #endif // #ifdef LLVM_HAVE_TF_API extern cl::opt EvictInterferenceCutoff; @@ -192,25 +199,36 @@ "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") -// The model learns to pick one of the mask == 1 interferences. This is the name -// of the output tensor. -// The contract with the model is that the output will be guaranteed to be to a -// mask == 1 position. -// Using a macro here to avoid 'not used' warnings (and keep cond compilation to -// a minimum) +// features that are currently being used for development purposes and not being +// used in any shipped model. Only enabled when LLVM is compled in MLGO +// development mode. +#ifdef LLVM_HAVE_TF_API +#define DEVELOPMENT_RA_EVICT_FEATURES_LIST(M) \ + M(int64_t, dummy_feature, PerLiveRangeShape, \ + "A dummy variable for testing the release/development feature split") +#else +#define DEVELOPMENT_RA_EVICT_FEATURES_LIST(M) +#endif // LLVM_HAVE_TF_API + +// The model learns to pick one of the mask == 1 interferences. This is the +// name of the output tensor. The contract with the model is that the output +// will be guaranteed to be to a mask == 1 position. Using a macro here to +// avoid 'not used' warnings (and keep cond compilation to a minimum) #define DecisionName "index_to_evict" // Named features index. enum FeatureIDs { #define _FEATURE_IDX(_, name, __, ___) name, - RA_EVICT_FEATURES_LIST(_FEATURE_IDX) + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount, + DEVELOPMENT_RA_EVICT_FEATURES_LIST(_FEATURE_IDX) + FeatureCountWithDevelopmentFeatures #undef _FEATURE_IDX - FeatureCount }; // The ML advisor will typically have a sparse input to the evaluator, because // various phys regs won't be available. It's easier (maintenance-wise) to -// bulk-reset the state of the evaluator each time we are about to use it again. +// bulk-reset the state of the evaluator each time we are about to use it +// again. template size_t getTotalSize(const std::vector &Shape) { size_t Ret = sizeof(T); for (const auto V : Shape) @@ -224,10 +242,20 @@ getTotalSize(SHAPE)); RA_EVICT_FEATURES_LIST(_RESET) #undef _RESET + if (EnableFeaturesUnderDevelopment) { +// For the development feature case, we need to reset one less than the index as +// all of the development features appear after the FeatureCount which takes up +// one slot. +#define _DEV_RESET(TYPE, NAME, SHAPE, __) \ + std::memset(Runner.getTensorUntyped(FeatureIDs::NAME - 1), 0, \ + getTotalSize(SHAPE)); + DEVELOPMENT_RA_EVICT_FEATURES_LIST(_DEV_RESET) +#undef _DEV_RESET + } } -// Per-live interval components that get aggregated into the feature values that -// will be passed to the evaluator. +// Per-live interval components that get aggregated into the feature values +// that will be passed to the evaluator. struct LIFeatureComponents { double R = 0; double W = 0; @@ -241,7 +269,8 @@ using CandidateRegList = std::array, NumberOfInterferences>; -using FeaturesListNormalizer = std::array; +using FeaturesListNormalizer = + llvm::SmallVector; /// The ML evictor (commonalities between release and development mode) class MLEvictAdvisor : public RegAllocEvictionAdvisor { @@ -259,10 +288,10 @@ // error, and we shouldn't be asking for it here. const MLModelRunner &getRunner() const { return *Runner; } - /// This just calls Evaluate on the Runner, but in the development mode case, - /// if we're just capturing the log of the default advisor, it needs to call - /// the latter instead, so we need to pass all the necessary parameters for - /// it. In the development case, it will also log. + /// This just calls Evaluate on the Runner, but in the development mode + /// case, if we're just capturing the log of the default advisor, it needs + /// to call the latter instead, so we need to pass all the necessary + /// parameters for it. In the development case, it will also log. virtual int64_t tryFindEvictionCandidatePosition(const LiveInterval &VirtReg, const AllocationOrder &Order, @@ -271,11 +300,11 @@ /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. - bool - loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, const SmallVirtRegSet &FixedRegisters, - std::array &Largest, - size_t Pos) const; + bool loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, + bool IsHint, + const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl &Largest, + size_t Pos) const; private: static float getInitialQueueSize(const MachineFunction &MF); @@ -286,11 +315,12 @@ const SmallVirtRegSet &FixedRegisters) const override; void extractFeatures(const SmallVectorImpl &Intervals, - std::array &Largest, - size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, + llvm::SmallVectorImpl &Largest, size_t Pos, + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const; - // Point-in-time: we didn't learn this, so we always delegate to the default. + // Point-in-time: we didn't learn this, so we always delegate to the + // default. bool canEvictHintInterference( const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const override { @@ -302,9 +332,9 @@ getLIFeatureComponents(const LiveInterval &LI) const; // Hold on to a default advisor for: - // 1) the implementation of canEvictHintInterference, because we didn't learn - // that nuance yet; - // 2) for bootstrapping (logging) in the development mode case. + // 1) the implementation of canEvictHintInterference, because we didn't + // learn that nuance yet; 2) for bootstrapping (logging) in the development + // mode case. const DefaultEvictionAdvisor DefaultAdvisor; MLModelRunner *const Runner; const MachineBlockFrequencyInfo &MBFI; @@ -312,7 +342,7 @@ // Indices of those features we don't want to normalize. // This could be static and shared, but its initialization is non-trivial. - std::bitset DoNotNormalize; + std::bitset DoNotNormalize; const float InitialQSize; using RegID = unsigned; @@ -322,10 +352,6 @@ #define _DECL_FEATURES(type, name, shape, _) \ TensorSpec::createSpec(#name, shape), -static const std::vector InputFeatures{ - {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}, -}; -#undef _DECL_FEATURES // =================================== // Release (AOT) - specifics // =================================== @@ -333,13 +359,22 @@ : public RegAllocEvictionAdvisorAnalysis { public: ReleaseModeEvictionAdvisorAnalysis() - : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {} + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) { + if (EnableFeaturesUnderDevelopment) { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES) + DEVELOPMENT_RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + } + } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { return R->getAdvisorMode() == AdvisorMode::Release; } private: + static std::vector InputFeatures; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); @@ -358,6 +393,8 @@ std::unique_ptr> Runner; }; +std::vector ReleaseModeEvictionAdvisorAnalysis::InputFeatures; + // =================================== // Development mode-specifics // =================================== @@ -369,19 +406,12 @@ static const TensorSpec Reward = TensorSpec::createSpec("reward", {1}); // Features we bind on the model. The tensor names have a prefix, and we also -// need to include some tensors that are expected to be present by the training -// algo. +// need to include some tensors that are expected to be present by the +// training algo. // TODO: can we just get rid of these? #define _DECL_TRAIN_FEATURES(type, name, shape, _) \ TensorSpec::createSpec(std::string("action_") + #name, shape), -static const std::vector TrainingInputFeatures{ - {RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec("action_discount", {1}), - TensorSpec::createSpec("action_step_type", {1}), - TensorSpec::createSpec("action_reward", {1})}}; -#undef _DECL_TRAIN_FEATURES - class DevelopmentModeEvictAdvisor : public MLEvictAdvisor { public: DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, @@ -403,7 +433,25 @@ : public RegAllocEvictionAdvisorAnalysis { public: DevelopmentModeEvictionAdvisorAnalysis() - : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {} + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) { + if (EnableFeaturesUnderDevelopment) { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES) + DEVELOPMENT_RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + DEVELOPMENT_RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } else { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } + } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { return R->getAdvisorMode() == AdvisorMode::Development; @@ -419,6 +467,9 @@ } private: + static std::vector InputFeatures; + static std::vector TrainingInputFeatures; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); @@ -485,6 +536,11 @@ std::unique_ptr Runner; StringMap> LogMap; }; + +std::vector DevelopmentModeEvictionAdvisorAnalysis::InputFeatures; +std::vector + DevelopmentModeEvictionAdvisorAnalysis::TrainingInputFeatures; + #endif //#ifdef LLVM_HAVE_TF_API } // namespace @@ -515,6 +571,9 @@ DoNotNormalize.set(FeatureIDs::min_stage); DoNotNormalize.set(FeatureIDs::max_stage); DoNotNormalize.set(FeatureIDs::progress); + /*if(EnableFeaturesUnderDevelopment) { + DoNotNormalize.set(FeatureIDs::dummy_feature); + }*/ } int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition( @@ -528,8 +587,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures( const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, - const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest, - size_t Pos) const { + const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl &Largest, size_t Pos) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { // leave unavailable @@ -546,8 +605,8 @@ SmallVector InterferingIntervals; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - // Different from the default heuristic, we don't make any assumptions about - // what having more than 10 results in the query may mean. + // Different from the default heuristic, we don't make any assumptions + // about what having more than 10 results in the query may mean. const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff); if (IFIntervals.empty() && InterferingIntervals.empty()) continue; @@ -604,14 +663,14 @@ // max, then any of the costs of the legally-evictable intervals // would be lower. When that happens, one of those will be selected. // Therefore, we allow the candidate be selected, unless the candidate is - // unspillable, in which case it would be incorrect to not find a register for - // it. + // unspillable, in which case it would be incorrect to not find a register + // for it. const bool MustFindEviction = (!VirtReg.isSpillable() && CostPerUseLimit == static_cast(~0u)); // Number of available candidates - if 0, no need to continue. size_t Available = 0; - // Make sure we don't have leftover partial state from an attempt where we had - // no available candidates and bailed out early. + // Make sure we don't have leftover partial state from an attempt where we + // had no available candidates and bailed out early. resetInputs(*Runner); // Track the index->register mapping because AllocationOrder doesn't do that @@ -624,15 +683,16 @@ // only normalize (some of) the float features, but it's just simpler to // dimension 'Largest' to all the features, especially since we have the // 'DoNotNormalize' list. - FeaturesListNormalizer Largest; - Largest.fill(0.0); - - // Same overal idea as in the default eviction policy - we visit the values of - // AllocationOrder one at a time. If it's not legally available, we mask off - // the corresponding feature column (==do nothing because we already reset all - // the features to 0) - // Use Pos to capture the column we load features at - in AllocationOrder - // order. + int FeatureCount = EnableFeaturesUnderDevelopment + ? FeatureIDs::FeatureCountWithDevelopmentFeatures + : FeatureIDs::FeatureCount; + FeaturesListNormalizer Largest(FeatureCount, 0.0); + + // Same overal idea as in the default eviction policy - we visit the values + // of AllocationOrder one at a time. If it's not legally available, we mask + // off the corresponding feature column (==do nothing because we already + // reset all the features to 0) Use Pos to capture the column we load + // features at - in AllocationOrder order. size_t Pos = 0; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { @@ -659,7 +719,8 @@ Regs[CandidateVirtRegPos].second = !MustFindEviction; if (!MustFindEviction) extractFeatures(SmallVector(1, &VirtReg), Largest, - CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, + CandidateVirtRegPos, /*IsHint*/ 0, + /*LocalIntfsCount*/ 0, /*NrUrgent*/ 0.0); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); @@ -746,8 +807,8 @@ // of accummulating the various features, we keep them separate. void MLEvictAdvisor::extractFeatures( const SmallVectorImpl &Intervals, - std::array &Largest, size_t Pos, - int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const { + llvm::SmallVectorImpl &Largest, size_t Pos, int64_t IsHint, + int64_t LocalIntfsCount, float NrUrgent) const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; double R = 0.0; @@ -853,9 +914,9 @@ } else { MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate( VirtReg, Order, CostPerUseLimit, FixedRegisters); - // Find the index of the selected PhysReg. We need it for logging, otherwise - // this is wasted cycles (but so would starting development mode without a - // model nor logging) + // Find the index of the selected PhysReg. We need it for logging, + // otherwise this is wasted cycles (but so would starting development mode + // without a model nor logging) if (!PhysReg) Ret = CandidateVirtRegPos; else @@ -872,6 +933,19 @@ CurrentFeature, reinterpret_cast( getRunner().getTensorUntyped(CurrentFeature))); } + if (EnableFeaturesUnderDevelopment) { + // skip FeatureIDs::FeatureCount + ++CurrentFeature; + for (; CurrentFeature < FeatureIDs::FeatureCountWithDevelopmentFeatures; + ++CurrentFeature) { + Log->logSpecifiedTensorValue( + CurrentFeature - 1, + reinterpret_cast( + getRunner().getTensorUntyped(CurrentFeature - 1))); + } + // reset indexing for next steps + CurrentFeature -= 1; + } if (auto *MUTR = dyn_cast(&getRunner())) for (size_t I = 1; I < MUTR->outputLoggedFeatureSpecs().size(); ++I, ++CurrentFeature)