diff --git a/llvm/include/llvm/Analysis/MLModelRunner.h b/llvm/include/llvm/Analysis/MLModelRunner.h --- a/llvm/include/llvm/Analysis/MLModelRunner.h +++ b/llvm/include/llvm/Analysis/MLModelRunner.h @@ -41,14 +41,15 @@ getTensorUntyped(static_cast(FeatureID))); } -protected: - MLModelRunner(LLVMContext &Ctx) : Ctx(Ctx) {} - virtual void *evaluateUntyped() = 0; virtual void *getTensorUntyped(size_t Index) = 0; const void *getTensorUntyped(size_t Index) const { return (const_cast(this))->getTensorUntyped(Index); } +protected: + MLModelRunner(LLVMContext &Ctx) : Ctx(Ctx) {} + virtual void *evaluateUntyped() = 0; + LLVMContext &Ctx; }; } // namespace llvm diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -113,6 +113,7 @@ MIRFSDiscriminator.cpp MIRSampleProfile.cpp MIRYamlMapping.cpp + MLRegallocEvictAdvisor.cpp ModuloSchedule.cpp MultiHazardRecognizer.cpp PatchableFunction.cpp diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -0,0 +1,170 @@ +//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the ML eviction advisor and reward injection pass +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" + +#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) + +#include "RegAllocEvictionAdvisor.h" +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/ModelUnderTrainingRunner.h" +#include "llvm/Analysis/NoInferenceModelRunner.h" +#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" + +#include + +using namespace llvm; + +#define DEBUG_TYPE "ml-regalloc" + +namespace { +// This is the maximum number of interfererring ranges. That's the number of +// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize. +// For X86, that's 32. +// TODO: find a way to get this, statically, in a programmatic way. +static const int64_t MaxInterferences = 32; + +// Logically, we can think of the feature set given to the evaluator as a 2D +// matrix. The rows are the features (see next). The columns correspond to the +// interferences. We treat the candidate virt reg as an 'interference', too, as +// its feature set is the same as that of the interferring ranges. So we'll have +// MaxInterferences + 1 columns and by convention, we will use the last column +// for the virt reg seeking allocation. +static const int64_t CandidateVirtRegPos = MaxInterferences; +static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; + +// Most features are as described above, so we'll reuse this vector in defining +// them. +static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; + +// -------------- +// Features table +// -------------- +// For each interfering live range (incl. the candidate) we collect a number of +// features. However, because the features are of different types (and because +// of ML best practices), we organize the tensors per feature, not per +// candidate. Each such tensor has a scalar value corresponding to the +// interferring live range at that position, in the order in AllocationOrder. +// The last position corresponds to the virt reg seeking allocation. +// Exception to all that is the progression feature, which is just a scalar (see +// its documentation for details). +// Note on naming: the "_by_max" are normalized using the largest value of that +// tensor, as observed in the current decision making stage (i.e. for the +// current call to the advisor's tryFindEvictionCandidate) +// +// The feature list format: type, name, shape, documentation. +// Note: we can really just use int64 and float, hence the modeling of some +// bools as int64 values. +#define RA_EVICT_FEATURES_LIST(M) \ + M(int64_t, mask, PerLiveRangeShape, \ + "boolean values, 0 for unavailable candidates (i.e. if a position is 0, " \ + "it " \ + "can't be evicted)") \ + M(int64_t, is_free, PerLiveRangeShape, \ + "boolean values, 1 if this phys reg is actually free (no interferences)") \ + M(float, nr_broken_hints, PerLiveRangeShape, \ + "if this position were evicted, how many broken hints would there be") \ + M(int64_t, is_hint, PerLiveRangeShape, \ + "is this a preferred phys reg for the candidate") \ + M(int64_t, is_local, PerLiveRangeShape, \ + "is this live range local to a basic block") \ + M(float, nr_rematerializable, PerLiveRangeShape, \ + "nr rematerializable ranges") \ + M(float, nr_defs_and_uses, PerLiveRangeShape, \ + "bb freq - weighed nr defs and uses") \ + M(float, weighed_reads_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of reads, normalized") \ + M(float, weighed_writes_by_max, PerLiveRangeShape, \ + "bb feq - weighed nr of writes, normalized") \ + M(float, weighed_read_writes_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are both read and writes, normalized") \ + M(float, weighed_indvars_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are indvars, normalized") \ + M(float, hint_weights_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are hints, normalized") \ + M(float, start_bb_freq_by_max, PerLiveRangeShape, \ + "the freq in the start block, normalized") \ + M(float, end_bb_freq_by_max, PerLiveRangeShape, \ + "freq of end block, normalized") \ + M(float, hottest_bb_freq_by_max, PerLiveRangeShape, \ + "hottest BB freq, normalized") \ + M(float, liverange_size, PerLiveRangeShape, \ + "size (instr index diff) of the LR") \ + M(float, use_def_density, PerLiveRangeShape, \ + "the max weight, as computed by the manual heuristic") \ + M(int64_t, max_stage, PerLiveRangeShape, \ + "largest stage of an interval in this LR") \ + M(int64_t, min_stage, PerLiveRangeShape, \ + "lowest stage of an interval in this LR") \ + M(float, progress, {1}, "ratio of current queue size to initial size") + +// The model learns to pick one of the mask == 1 interferences. This is the name +// of the output tensor. +// The contract with the model is that the output will be guaranteed to be to a +// mask == 1 position. +const char *const DecisionName = "index_to_evict"; + +// Named features index. +enum FeatureIDs { +#define _FEATURE_IDX(_, name, __, ___) name, + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) +#undef _FEATURE_IDX + FeatureCount +}; + +// The ML advisor will typically have a sparse input to the evaluator, because +// various phys regs won't be available. It's easier (maintenance-wise) to +// bulk-reset the state of the evaluator each time we are about to use it again. +template size_t getTotalSize(const std::vector &Shape) { + size_t Ret = sizeof(T); + for (const auto V : Shape) + Ret *= V; + return Ret; +} + +void resetInputs(MLModelRunner &Runner) { +#define _RESET(TYPE, NAME, SHAPE, __) \ + std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ + getTotalSize(SHAPE)); + RA_EVICT_FEATURES_LIST(_RESET) +#undef _RESET +} + +// Development mode-specifics +#ifdef LLVM_HAVE_TF_API +#define _DECL_FEATURES(type, name, shape, _) \ + TensorSpec::createSpec(#name, shape), + +static const std::vector InputFeatures{ + {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}}; +#undef _DECL_FEATURES +static const TensorSpec Output = + TensorSpec::createSpec(DecisionName, {1}); +const char *const RewardName = "reward"; +static const TensorSpec Reward = + TensorSpec::createSpec(RewardName, {1}); + +#endif //#ifdef LLVM_HAVE_TF_API +} // namespace +#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) \ No newline at end of file