diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -54,6 +54,45 @@ const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536; } // namespace InlineConstants +// The features which determine the computation of InlineCost. +#define INLINE_COST_FEATURE_ITERATOR(M) \ + M(SROASavings, "sroa_savings") \ + M(SROALosses, "sroa_losses") \ + M(LoadElimination, "load_elimination") \ + M(CallPenalty, "call_penalty") \ + M(CallArgumentSetup, "call_argument_setup") \ + M(LoadRelativeIntrinsic, "load_relative_intrinsic") \ + M(LoweredCallArgSetup, "lowered_call_arg_setup") \ + M(IndirectCallPenalty, "indirect_call_penalty") \ + M(JumpTablePenalty, "jump_table_penalty") \ + M(CaseClusterPenalty, "case_cluster_penalty") \ + M(SwitchPenalty, "switch_penalty") \ + M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions") \ + M(NumLoops, "num_loops") \ + M(DeadBlocks, "dead_blocks") \ + M(SimplifiedInstructions, "simplified_instructions") \ + M(ConstantArgs, "constant_args") \ + M(ConstantOffsetPtrArgs, "constant_offset_ptr_args") \ + M(CallSiteCost, "callsite_cost") \ + M(ColdCcPenalty, "cold_cc_penalty") \ + M(LastCallToStaticBonus, "last_call_to_static_bonus") \ + M(IsMultipleBlocks, "is_multiple_blocks") + +// clang-format off +enum class InlineCostFeatures : size_t { +#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME, + INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES) +#undef POPULATE_INDICES + + NumberOfFeatures +}; +// clang-format on + +using InlineCostFeaturesArray = + std::array(InlineCostFeatures::NumberOfFeatures)>; + +int reduceCostFeatures(const InlineCostFeaturesArray &Features); + /// Represents the cost of inlining a function. /// /// This supports special values for functions which should "always" or @@ -263,7 +302,7 @@ /// returns: /// - None, if the inlining cannot happen (is illegal) /// - an integer, representing the cost. -Optional getInliningCostEstimate( +Optional getInliningCostFeatures( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h --- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h +++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h @@ -14,6 +14,8 @@ #include #include +#include "llvm/Analysis/InlineCost.h" + namespace llvm { // List of features. Each feature is defined through a triple: @@ -33,7 +35,6 @@ "total current number of defined functions in the module") \ M(NrCtantParams, "nr_ctant_params", \ "number of parameters in the call site that are constants") \ - M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)") \ M(EdgeCount, "edge_count", "total number of calls in the module") \ M(CallerUsers, "caller_users", \ "number of module-internal users of the caller, +1 if the caller is " \ @@ -46,14 +47,29 @@ "number of blocks reached from a conditional instruction, in the callee") \ M(CalleeUsers, "callee_users", \ "number of module-internal users of the callee, +1 if the callee is " \ - "exposed externally") + "exposed externally") \ + M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)") +// clang-format off enum class FeatureIndex : size_t { +// InlineCost features - these must come first +#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME, + INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES) +#undef POPULATE_INDICES + +// Non-cost features #define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME, INLINE_FEATURE_ITERATOR(POPULATE_INDICES) #undef POPULATE_INDICES - NumberOfFeatures + + NumberOfFeatures }; +// clang-format on + +constexpr FeatureIndex +inlineCostFeatureToMlFeature(InlineCostFeatures Feature) { + return static_cast(static_cast(Feature)); +} constexpr size_t NumberOfFeatures = static_cast(FeatureIndex::NumberOfFeatures); diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -129,7 +129,8 @@ cl::desc("Disables evaluation of GetElementPtr with constant operands")); namespace { -class InlineCostCallAnalyzer; + +template class InlineCostCallAnalyzer; // This struct is used to store information about inline cost of a // particular instruction @@ -146,12 +147,14 @@ bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; } }; +template class InlineCostAnnotationWriter : public AssemblyAnnotationWriter { private: - InlineCostCallAnalyzer *const ICCA; + InlineCostCallAnalyzer *const ICCA; public: - InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {} + InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) + : ICCA(ICCA) {} virtual void emitInstructionAnnot(const Instruction *I, formatted_raw_ostream &OS) override; }; @@ -410,19 +413,18 @@ bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer( - Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, - function_ref GetAssumptionCache, - function_ref GetBFI = nullptr, - ProfileSummaryInfo *PSI = nullptr, - OptimizationRemarkEmitter *ORE = nullptr) + CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, + function_ref GetAssumptionCache, + function_ref GetBFI = nullptr, + ProfileSummaryInfo *PSI = nullptr, + OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call), EnableLoadElimination(true) {} InlineResult analyze(); - Optional getSimplifiedValue(Instruction *I) { + Optional getSimplifiedValue(Instruction *I) { if (SimplifiedValues.find(I) != SimplifiedValues.end()) return SimplifiedValues[I]; return None; @@ -440,8 +442,92 @@ void dump(); }; +bool isExclusiveMLFeature(InlineCostFeatures Feature) { + return Feature == InlineCostFeatures::SROASavings || + Feature == InlineCostFeatures::IsMultipleBlocks || + Feature == InlineCostFeatures::DeadBlocks || + Feature == InlineCostFeatures::SimplifiedInstructions || + Feature == InlineCostFeatures::ConstantArgs || + Feature == InlineCostFeatures::ConstantOffsetPtrArgs; +} + +class InlineCostValueUpdater { +public: + using CostType = int; + + bool shouldStop(int64_t Threshold) const { return Cost >= Threshold; } + + void addUnchecked(InlineCostFeatures Feature, int CostDelta) { + Cost += CostDelta; + } + + void add(InlineCostFeatures Feature, int64_t CostDelta, + int64_t UpperBound = std::numeric_limits::max()) { + assert(UpperBound > 0 && UpperBound <= std::numeric_limits::max() && + "invalid upper bound"); + + if (isExclusiveMLFeature(Feature)) { + return; + } + + Cost = std::min(UpperBound, Cost + CostDelta); + } + + void accumulateBonus(int OtherCost, int Threshold) { + Cost -= std::max(0, Threshold - OtherCost); + } + + // These all are equivalent in this case, but differ for the + // InlineCostFeaturesUpdater + int reduce() const { return Cost; } + int get() const { return Cost; } + +private: + int Cost = 0; +}; + +class InlineCostFeaturesUpdater { +public: + using CostType = InlineCostFeaturesArray; + + // Never stop, because we want to collect all features + bool shouldStop(int64_t Threshold) const { + (void)Threshold; + return false; + } + + void addUnchecked(InlineCostFeatures Feature, int CostDelta) { + auto FeatureIdx = static_cast(Feature); + Cost[FeatureIdx] += CostDelta; + } + + void add(InlineCostFeatures Feature, int64_t CostDelta, + int64_t UpperBound = std::numeric_limits::max()) { + assert(UpperBound > 0 && UpperBound <= std::numeric_limits::max() && + "invalid upper bound"); + auto FeatureIdx = static_cast(Feature); + Cost[FeatureIdx] = + std::min(UpperBound, Cost[FeatureIdx] + CostDelta); + } + + void accumulateBonus(const CostType &OtherFeatures, int Threshold) { + for (int I = 0; I < static_cast(InlineCostFeatures::NumberOfFeatures); + ++I) { + Cost[I] -= std::max(0, Threshold - OtherFeatures[I]); + } + } + + int reduce() const { return reduceCostFeatures(Cost); } + + const InlineCostFeaturesArray &get() const { return Cost; } + +private: + CostType Cost = {}; +}; + /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer +template class InlineCostCallAnalyzer final : public CallAnalyzer { const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; const bool ComputeFullInlineCost; @@ -477,7 +563,7 @@ /// instructions expected to be executed for a given function invocation. /// Instructions that are statically proven to be dead based on call-site /// arguments are not counted here. - int Cost = 0; + CostUpdater Cost = {}; // The cumulative cost at the beginning of the basic block being analyzed. At // the end of analyzing each basic block, "Cost - CostAtBBStart" represents @@ -523,30 +609,35 @@ auto CostIt = SROAArgCosts.find(Arg); if (CostIt == SROAArgCosts.end()) return; - addCost(CostIt->second); + Cost.add(InlineCostFeatures::SROALosses, CostIt->second); SROACostSavings -= CostIt->second; SROACostSavingsLost += CostIt->second; SROAArgCosts.erase(CostIt); } void onDisableLoadElimination() override { - addCost(LoadEliminationCost); + Cost.add(InlineCostFeatures::LoadElimination, LoadEliminationCost); LoadEliminationCost = 0; } - void onCallPenalty() override { addCost(InlineConstants::CallPenalty); } + void onCallPenalty() override { + Cost.add(InlineCostFeatures::CallPenalty, InlineConstants::CallPenalty); + } void onCallArgumentSetup(const CallBase &Call) override { // Pay the price of the argument setup. We account for the average 1 // instruction per call argument setup here. - addCost(Call.arg_size() * InlineConstants::InstrCost); + Cost.add(InlineCostFeatures::CallArgumentSetup, + Call.arg_size() * InlineConstants::InstrCost); } void onLoadRelativeIntrinsic() override { // This is normally lowered to 4 LLVM instructions. - addCost(3 * InlineConstants::InstrCost); + Cost.add(InlineCostFeatures::LoadRelativeIntrinsic, + 3 * InlineConstants::InstrCost); } void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) override { // We account for the average 1 instruction per call argument setup here. - addCost(Call.arg_size() * InlineConstants::InstrCost); + Cost.add(InlineCostFeatures::LoweredCallArgSetup, + Call.arg_size() * InlineConstants::InstrCost); // If we have a constant that we are calling as a function, we can peer // through it and see the function target. This happens not infrequently @@ -564,11 +655,12 @@ if (CA.analyze().isSuccess()) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. - Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + Cost.accumulateBonus(CA.getCost(), CA.getThreshold()); } } else // Otherwise simply add the cost for merely making the call. - addCost(InlineConstants::CallPenalty); + Cost.add(InlineCostFeatures::IndirectCallPenalty, + InlineConstants::CallPenalty); } void onFinalizeSwitch(unsigned JumpTableSize, @@ -580,7 +672,8 @@ int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + 4 * InlineConstants::InstrCost; - addCost(JTCost, (int64_t)CostUpperBound); + Cost.add(InlineCostFeatures::JumpTablePenalty, JTCost, + (int64_t)CostUpperBound); return; } // Considering forming a binary search, we should find the number of nodes @@ -600,7 +693,8 @@ // n + n / 2 - 1 = n * 3 / 2 - 1 if (NumCaseCluster <= 3) { // Suppose a comparison includes one compare and one conditional branch. - addCost(NumCaseCluster * 2 * InlineConstants::InstrCost); + Cost.add(InlineCostFeatures::CaseClusterPenalty, + NumCaseCluster * 2 * InlineConstants::InstrCost); return; } @@ -608,10 +702,12 @@ int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; - addCost(SwitchCost, (int64_t)CostUpperBound); + Cost.add(InlineCostFeatures::SwitchPenalty, SwitchCost, + static_cast(CostUpperBound)); } void onMissedSimplification() override { - addCost(InlineConstants::InstrCost); + Cost.add(InlineCostFeatures::UnsimplifiedCommonInstructions, + InlineConstants::InstrCost); } void onInitializeSROAArg(AllocaInst *Arg) override { @@ -626,9 +722,12 @@ "expected this argument to have a cost"); CostIt->second += InlineConstants::InstrCost; SROACostSavings += InlineConstants::InstrCost; + Cost.add(InlineCostFeatures::SROASavings, InlineConstants::InstrCost); } - void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; } + void onBlockStart(const BasicBlock *BB) override { + CostAtBBStart = Cost.reduce(); + } void onBlockAnalyzed(const BasicBlock *BB) override { if (CostBenefitAnalysisEnabled) { @@ -640,7 +739,7 @@ auto ProfileCount = BFI->getBlockProfileCount(BB); assert(ProfileCount.hasValue()); if (ProfileCount.getValue() == 0) - ColdSize += Cost - CostAtBBStart; + ColdSize += Cost.reduce() - CostAtBBStart; } auto *TI = BB->getTerminator(); @@ -652,6 +751,8 @@ // Take off the bonus we applied to the threshold. Threshold -= SingleBBBonus; SingleBB = false; + + Cost.add(InlineCostFeatures::IsMultipleBlocks, 1); } } @@ -660,7 +761,7 @@ // the given instruction was assessed. if (!PrintInstructionComments) return; - InstructionCostDetailMap[I].CostBefore = Cost; + InstructionCostDetailMap[I].CostBefore = Cost.reduce(); InstructionCostDetailMap[I].ThresholdBefore = Threshold; } @@ -669,7 +770,7 @@ // the instruction has been assessed. if (!PrintInstructionComments) return; - InstructionCostDetailMap[I].CostAfter = Cost; + InstructionCostDetailMap[I].CostAfter = Cost.reduce(); InstructionCostDetailMap[I].ThresholdAfter = Threshold; } @@ -782,7 +883,7 @@ CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue(); // Remove the cost of the cold basic blocks. - int Size = Cost - ColdSize; + int Size = Cost.reduce() - ColdSize; // Allow tiny callees to be inlined regardless of whether they meet the // savings threshold. @@ -821,16 +922,25 @@ continue; NumLoops++; } - addCost(NumLoops * InlineConstants::CallPenalty); + Cost.add(InlineCostFeatures::NumLoops, + NumLoops * InlineConstants::CallPenalty); } + Cost.add(InlineCostFeatures::DeadBlocks, DeadBlocks.size()); + Cost.add(InlineCostFeatures::SimplifiedInstructions, + NumInstructionsSimplified); + Cost.add(InlineCostFeatures::ConstantArgs, NumConstantArgs); + Cost.add(InlineCostFeatures::ConstantOffsetPtrArgs, + NumConstantOffsetPtrArgs); + // We applied the maximum possible vector bonus at the beginning. Now, // subtract the excess bonus, if any, from the Threshold before // comparing against Cost. - if (NumVectorInstructions <= NumInstructions / 10) + if (NumVectorInstructions <= NumInstructions / 10) { Threshold -= VectorBonus; - else if (NumVectorInstructions <= NumInstructions / 2) + } else if (NumVectorInstructions <= NumInstructions / 2) { Threshold -= VectorBonus / 2; + } if (auto Result = costBenefitAnalysis()) { DecidedByCostBenefit = true; @@ -840,14 +950,15 @@ return InlineResult::failure("Cost over threshold."); } - if (IgnoreThreshold || Cost < std::max(1, Threshold)) + if (IgnoreThreshold || Cost.reduce() < std::max(1, Threshold)) return InlineResult::success(); return InlineResult::failure("Cost over threshold."); } bool shouldStop() override { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost; + return !IgnoreThreshold && Cost.reduce() >= Threshold && + !ComputeFullInlineCost; } void onLoadEliminationOpportunity() override { @@ -885,15 +996,17 @@ // Give out bonuses for the callsite, as the instructions setting them up // will be gone after inlining. - addCost(-getCallsiteCost(this->CandidateCall, DL)); + Cost.add(InlineCostFeatures::CallSiteCost, + -1 * getCallsiteCost(this->CandidateCall, DL)); // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; + Cost.addUnchecked(InlineCostFeatures::ColdCcPenalty, + InlineConstants::ColdccPenalty); // Check if we're done. This can happen due to bonuses and penalties. - if (Cost >= Threshold && !ComputeFullInlineCost) + if (Cost.shouldStop(Threshold) && !ComputeFullInlineCost) return InlineResult::failure("high cost"); return InlineResult::success(); @@ -918,7 +1031,7 @@ Writer(this) {} /// Annotation Writer for instruction details - InlineCostAnnotationWriter Writer; + InlineCostAnnotationWriter Writer; void dump(); @@ -933,12 +1046,35 @@ } virtual ~InlineCostCallAnalyzer() {} - int getThreshold() { return Threshold; } - int getCost() { return Cost; } - bool wasDecidedByCostBenefit() { return DecidedByCostBenefit; } + bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; } + int getThreshold() const { return Threshold; } + typename CostUpdater::CostType getCost() const { return Cost.get(); } }; } // namespace +llvm::raw_ostream &operator<<(llvm::raw_ostream &O, + const InlineCostValueUpdater &Cost) { + O << Cost.reduce(); + return O; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &O, + const InlineCostFeaturesUpdater &Cost) { + O << Cost.reduce(); + return O; +} + +int llvm::reduceCostFeatures(const InlineCostFeaturesArray &Features) { + int Sum = 0; + for (size_t I = 0; + I < static_cast(InlineCostFeatures::NumberOfFeatures); ++I) { + if (!isExclusiveMLFeature(static_cast(I))) { + Sum += Features[I]; + } + } + return Sum; +} + /// Test whether the given value is an Alloca-derived function argument. bool CallAnalyzer::isAllocaDerivedArg(Value *V) { return SROAArgValues.count(V); @@ -950,8 +1086,9 @@ disableLoadElimination(); } -void InlineCostAnnotationWriter::emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { +template +void InlineCostAnnotationWriter::emitInstructionAnnot( + const Instruction *I, formatted_raw_ostream &OS) { // The cost of inlining of the given instruction is printed always. // The threshold delta is printed only when it is non-zero. It happens // when we decided to give a bonus at a particular instruction. @@ -1056,8 +1193,8 @@ // is needed to track stack usage during inlining. Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( - AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getKnownMinSize(), - AllocatedSize); + AllocSize->getLimitedValue(), + DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) HasDynamicAlloca = true; return false; @@ -1210,11 +1347,11 @@ if (!DisableGEPConstOperand) if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - SmallVector Indices; - for (unsigned int Index = 1 ; Index < COps.size() ; ++Index) + SmallVector Indices; + for (unsigned int Index = 1; Index < COps.size(); ++Index) Indices.push_back(COps[Index]); - return ConstantExpr::getGetElementPtr(I.getSourceElementType(), COps[0], - Indices, I.isInBounds()); + return ConstantExpr::getGetElementPtr( + I.getSourceElementType(), COps[0], Indices, I.isInBounds()); })) return true; @@ -1415,8 +1552,9 @@ return true; } -bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call, - BlockFrequencyInfo *CallerBFI) { +template +bool InlineCostCallAnalyzer::isColdCallSite( + CallBase &Call, BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's coldness is // determined based on that. if (PSI && PSI->hasProfileSummary()) @@ -1438,9 +1576,9 @@ return CallSiteFreq < CallerEntryFreq * ColdProb; } -Optional -InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call, - BlockFrequencyInfo *CallerBFI) { +template +Optional InlineCostCallAnalyzer::getHotCallSiteThreshold( + CallBase &Call, BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's hotness is // determined based on that. @@ -1466,7 +1604,9 @@ return None; } -void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { +template +void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, + Function &Callee) { // If no size growth is allowed for this inlining, set Threshold to 0. if (!allowSizeGrowth(Call)) { Threshold = 0; @@ -1587,7 +1727,8 @@ // the cost of inlining it drops dramatically. It may seem odd to update // Cost in updateThreshold, but the bonus depends on the logic in this method. if (OnlyOneCallAndLocalLinkage) - Cost -= LastCallToStaticBonus; + Cost.addUnchecked(InlineCostFeatures::LastCallToStaticBonus, + -1 * LastCallToStaticBonus); } bool CallAnalyzer::visitCmpInst(CmpInst &I) { @@ -1949,9 +2090,9 @@ } // Select condition is a constant. - Value *SelectedV = CondC->isAllOnesValue() - ? TrueVal - : (CondC->isNullValue()) ? FalseVal : nullptr; + Value *SelectedV = CondC->isAllOnesValue() ? TrueVal + : (CondC->isNullValue()) ? FalseVal + : nullptr; if (!SelectedV) { // Condition is a vector constant that is not all 1s or all 0s. If all // operands are constants, ConstantExpr::getSelect() can handle the cases @@ -2394,7 +2535,7 @@ return finalizeAnalysis(); } -void InlineCostCallAnalyzer::print() { +template void InlineCostCallAnalyzer::print() { #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" if (PrintInstructionComments) F.print(dbgs(), &Writer); @@ -2416,7 +2557,8 @@ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Dump stats about this call's analysis. -LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { +template +LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(); } #endif @@ -2480,7 +2622,7 @@ GetAssumptionCache, GetTLI, GetBFI, PSI, ORE); } -Optional llvm::getInliningCostEstimate( +Optional llvm::getInliningCostFeatures( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI, @@ -2496,13 +2638,12 @@ /*ComputeFullInlineCost*/ true, /*EnableDeferral*/ true}; - InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE, true, - /*IgnoreThreshold*/ true); + InlineCostCallAnalyzer CA( + *Call.getCalledFunction(), Call, Params, CalleeTTI, GetAssumptionCache, + GetBFI, PSI, ORE, true, + /*IgnoreThreshold*/ true); auto R = CA.analyze(); - if (!R.isSuccess()) - return None; - return CA.getCost(); + return R.isSuccess() ? CA.getCost() : Optional(None); } Optional llvm::getAttributeBasedInliningDecision( @@ -2603,8 +2744,8 @@ << "... (caller:" << Call.getCaller()->getName() << ")\n"); - InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE); + InlineCostCallAnalyzer CA( + *Callee, Call, Params, CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); @@ -2772,8 +2913,8 @@ InlineCostAnnotationPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { PrintInstructionComments = true; - std::function GetAssumptionCache = [&]( - Function &F) -> AssumptionCache & { + std::function GetAssumptionCache = + [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; Module *M = F.getParent(); @@ -2793,8 +2934,9 @@ if (!CalledFunction || CalledFunction->isDeclaration()) continue; OptimizationRemarkEmitter ORE(CalledFunction); - InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI, - GetAssumptionCache, nullptr, &PSI, &ORE); + InlineCostCallAnalyzer ICCA( + *CalledFunction, *CI, Params, TTI, GetAssumptionCache, nullptr, + &PSI, &ORE); ICCA.analyze(); OS << " Analyzing call of " << CalledFunction->getName() << "... (caller:" << CI->getCaller()->getName() << ")\n"; diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -43,11 +43,19 @@ "blocking any further inlining."), cl::init(2.0)); +// clang-format off const std::array llvm::FeatureNameMap{ +// InlineCost features - these must come first +#define POPULATE_NAMES(INDEX_NAME, NAME) NAME, + INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) +#undef POPULATE_NAMES + +// Non-cost features #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, - INLINE_FEATURE_ITERATOR(POPULATE_NAMES) + INLINE_FEATURE_ITERATOR(POPULATE_NAMES) #undef POPULATE_NAMES }; +// clang-format on const char *const llvm::DecisionName = "inlining_decision"; const char *const llvm::DefaultDecisionName = "inlining_default"; @@ -204,17 +212,17 @@ return std::make_unique(this, CB, ORE, Mandatory); } - int CostEstimate = 0; + InlineCostFeaturesArray CostFeatures; if (!Mandatory) { auto IsCallSiteInlinable = - llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); + llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); if (!IsCallSiteInlinable) { // We can't inline this for correctness reasons, so return the base // InlineAdvice, as we don't care about tracking any state changes (which // won't happen). return std::make_unique(this, CB, ORE, false); } - CostEstimate = *IsCallSiteInlinable; + CostFeatures = std::move(*IsCallSiteInlinable); } if (Mandatory) @@ -234,7 +242,6 @@ FunctionLevels[&Caller]); ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount); ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams); - ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate); ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount); ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses); ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks, @@ -244,6 +251,17 @@ ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks, CalleeBefore.BlocksReachedFromConditionalInstruction); ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses); + ModelRunner->setFeature(FeatureIndex::CostEstimate, + reduceCostFeatures(CostFeatures)); + + // Add the cost features + for (size_t I = 0; + I < static_cast(InlineCostFeatures::NumberOfFeatures); ++I) { + ModelRunner->setFeature( + inlineCostFeatureToMlFeature(static_cast(I)), + CostFeatures[I]); + } + return getAdviceFromModel(CB, ORE); }