Index: llvm/trunk/include/llvm/Analysis/InlineCost.h =================================================================== --- llvm/trunk/include/llvm/Analysis/InlineCost.h +++ llvm/trunk/include/llvm/Analysis/InlineCost.h @@ -101,14 +101,15 @@ /// \brief Get an InlineCost object representing the cost of inlining this /// callsite. /// -/// Note that threshold is passed into this function. Only costs below the -/// threshold are computed with any accuracy. The threshold can be used to -/// bound the computation necessary to determine whether the cost is +/// Note that a default threshold is passed into this function. This threshold +/// could be modified based on callsite's properties and only costs below this +/// new threshold are computed with any accuracy. The new threshold can be +/// used to bound the computation necessary to determine whether the cost is /// sufficiently low to warrant inlining. /// /// Also note that calling this function *dynamically* computes the cost of /// inlining the callsite. It is an expensive, heavyweight call. -InlineCost getInlineCost(CallSite CS, int Threshold, +InlineCost getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, AssumptionCacheTracker *ACT); @@ -117,10 +118,15 @@ /// pointer. This behaves exactly as the version with no explicit callee /// parameter in all other respects. // -InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold, +InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold, TargetTransformInfo &CalleeTTI, AssumptionCacheTracker *ACT); +int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel); + +/// \brief Return the default value of -inline-threshold. +int getDefaultInlineThreshold(); + /// \brief Minimal filter to detect invalid constructs for inlining. bool isInlineViable(Function &Callee); } Index: llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h +++ llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h @@ -31,7 +31,7 @@ /// struct Inliner : public CallGraphSCCPass { explicit Inliner(char &ID); - explicit Inliner(char &ID, int Threshold, bool InsertLifetime); + explicit Inliner(char &ID, bool InsertLifetime); /// getAnalysisUsage - For this class, we declare that we require and preserve /// the call graph. If the derived class implements this method, it should @@ -47,18 +47,6 @@ // processing to avoid breaking the SCC traversal. bool doFinalization(CallGraph &CG) override; - /// This method returns the value specified by the -inline-threshold value, - /// specified on the command line. This is typically not directly needed. - /// - unsigned getInlineThreshold() const { return InlineThreshold; } - - /// Calculate the inline threshold for given Caller. This threshold is lower - /// if the caller is marked with OptimizeForSize and -inline-threshold is not - /// given on the comand line. It is higher if the callee is marked with the - /// inlinehint attribute. - /// - unsigned getInlineThreshold(CallSite CS) const; - /// getInlineCost - This method must be implemented by the subclass to /// determine the cost of inlining the specified call site. If the cost /// returned is greater than the current inline threshold, the call site is @@ -75,9 +63,6 @@ bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false); private: - // InlineThreshold - Cache the value here for easy access. - unsigned InlineThreshold; - // InsertLifetime - Insert @llvm.lifetime intrinsics. bool InsertLifetime; Index: llvm/trunk/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp +++ llvm/trunk/lib/Analysis/InlineCost.cpp @@ -39,6 +39,32 @@ STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); +// Threshold to use when optsize is specified (and there is no +// -inline-threshold). +const int OptSizeThreshold = 75; + +// Threshold to use when -Oz is specified (and there is no -inline-threshold). +const int OptMinSizeThreshold = 25; + +// Threshold to use when -O[34] is specified (and there is no +// -inline-threshold). +const int OptAggressiveThreshold = 275; + +static cl::opt DefaultInlineThreshold( + "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, + cl::desc("Control the amount of inlining to perform (default = 225)")); + +static cl::opt HintThreshold( + "inlinehint-threshold", cl::Hidden, cl::init(325), + cl::desc("Threshold for inlining functions with inline hint")); + +// We introduce this threshold to help performance of instrumentation based +// PGO before we actually hook up inliner with analysis passes such as BPI and +// BFI. +static cl::opt ColdThreshold( + "inlinecold-threshold", cl::Hidden, cl::init(225), + cl::desc("Threshold for inlining functions with cold attribute")); + namespace { class CallAnalyzer : public InstVisitor { @@ -122,6 +148,12 @@ /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); + /// Update Threshold based on callsite properties such as callee + /// attributes and callee hotness for PGO builds. The Callee is explicitly + /// passed to support analyzing indirect calls whose target is inferred by + /// analysis. + void updateThreshold(CallSite CS, Function &Callee); + // Custom analysis routines. bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); @@ -541,6 +573,56 @@ return false; } +void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { + // If -inline-threshold is not given, listen to the optsize attribute when it + // would decrease the threshold. + Function *Caller = CS.getCaller(); + + // FIXME: Use Function::optForSize() + bool OptSize = Caller->hasFnAttribute(Attribute::OptimizeForSize); + + if (!(DefaultInlineThreshold.getNumOccurrences() > 0) && OptSize && + OptSizeThreshold < Threshold) + Threshold = OptSizeThreshold; + + // If profile information is available, use that to adjust threshold of hot + // and cold functions. + // FIXME: The heuristic used below for determining hotness and coldness are + // based on preliminary SPEC tuning and may not be optimal. Replace this with + // a well-tuned heuristic based on *callsite* hotness and not callee hotness. + uint64_t FunctionCount = 0, MaxFunctionCount = 0; + bool HasPGOCounts = false; + if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) { + HasPGOCounts = true; + FunctionCount = Callee.getEntryCount().getValue(); + MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue(); + } + + // Listen to the inlinehint attribute or profile based hotness information + // when it would increase the threshold and the caller does not need to + // minimize its size. + bool InlineHint = + Callee.hasFnAttribute(Attribute::InlineHint) || + (HasPGOCounts && + FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)); + if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize()) + Threshold = HintThreshold; + + // Listen to the cold attribute or profile based coldness information + // when it would decrease the threshold. + bool ColdCallee = + Callee.hasFnAttribute(Attribute::Cold) || + (HasPGOCounts && + FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount)); + // Command line argument for DefaultInlineThreshold will override the default + // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, + // do not use the default cold threshold even if it is smaller. + if ((DefaultInlineThreshold.getNumOccurrences() == 0 || + ColdThreshold.getNumOccurrences() > 0) && + ColdCallee && ColdThreshold < Threshold) + Threshold = ColdThreshold; +} + bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. @@ -1079,6 +1161,10 @@ // nice to base the bonus values on something more scientific. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); + + // Update the threshold based on callsite properties + updateThreshold(CS, F); + FiftyPercentVectorBonus = 3 * Threshold / 2; TenPercentVectorBonus = 3 * Threshold / 4; const DataLayout &DL = F.getParent()->getDataLayout(); @@ -1335,15 +1421,31 @@ AttributeFuncs::areInlineCompatible(*Caller, *Callee); } -InlineCost llvm::getInlineCost(CallSite CS, int Threshold, +InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, AssumptionCacheTracker *ACT) { - return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT); + return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI, + ACT); +} + +int llvm::computeThresholdFromOptLevels(unsigned OptLevel, + unsigned SizeOptLevel) { + if (OptLevel > 2) + return OptAggressiveThreshold; + if (SizeOptLevel == 1) // -Os + return OptSizeThreshold; + if (SizeOptLevel == 2) // -Oz + return OptMinSizeThreshold; + return DefaultInlineThreshold; } -InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold, +int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; } + +InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, + int DefaultThreshold, TargetTransformInfo &CalleeTTI, AssumptionCacheTracker *ACT) { + // Cannot inline indirect calls. if (!Callee) return llvm::InlineCost::getNever(); @@ -1375,7 +1477,7 @@ DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS); + CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); Index: llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp +++ llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp @@ -37,13 +37,11 @@ class AlwaysInliner : public Inliner { public: - // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) { + AlwaysInliner() : Inliner(ID, /*InsertLifetime*/ true) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } - AlwaysInliner(bool InsertLifetime) - : Inliner(ID, -2000000000, InsertLifetime) { + AlwaysInliner(bool InsertLifetime) : Inliner(ID, InsertLifetime) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } Index: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp +++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp @@ -38,14 +38,19 @@ /// inliner pass and the always inliner pass. The two passes use different cost /// analyses to determine when to inline. class SimpleInliner : public Inliner { + // This field is populated based on one of the following: + // optimization or size optimization levels, + // --inline-threshold flag, + // user specified value. + int DefaultThreshold; public: - SimpleInliner() : Inliner(ID) { + SimpleInliner() + : Inliner(ID), DefaultThreshold(llvm::getDefaultInlineThreshold()) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } - SimpleInliner(int Threshold) - : Inliner(ID, Threshold, /*InsertLifetime*/ true) { + SimpleInliner(int Threshold) : Inliner(ID), DefaultThreshold(Threshold) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } @@ -54,7 +59,7 @@ InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); - return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT); + return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT); } bool runOnSCC(CallGraphSCC &SCC) override; @@ -64,17 +69,6 @@ TargetTransformInfoWrapperPass *TTIWP; }; -static int computeThresholdFromOptLevels(unsigned OptLevel, - unsigned SizeOptLevel) { - if (OptLevel > 2) - return 275; - if (SizeOptLevel == 1) // -Os - return 75; - if (SizeOptLevel == 2) // -Oz - return 25; - return 225; -} - } // end anonymous namespace char SimpleInliner::ID = 0; @@ -96,7 +90,7 @@ Pass *llvm::createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel) { return new SimpleInliner( - computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); + llvm::computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); } bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { Index: llvm/trunk/lib/Transforms/IPO/Inliner.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/Inliner.cpp +++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp @@ -47,33 +47,10 @@ // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); -static cl::opt -InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, - cl::desc("Control the amount of inlining to perform (default = 225)")); - -static cl::opt -HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), - cl::desc("Threshold for inlining functions with inline hint")); - -// We instroduce this threshold to help performance of instrumentation based -// PGO before we actually hook up inliner with analysis passes such as BPI and -// BFI. -static cl::opt -ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225), - cl::desc("Threshold for inlining functions with cold attribute")); +Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} -// Threshold to use when optsize is specified (and there is no -inline-limit). -const int OptSizeThreshold = 75; - -Inliner::Inliner(char &ID) - : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) { -} - -Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) - : CallGraphSCCPass(ID), - InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit - : Threshold), - InsertLifetime(InsertLifetime) {} +Inliner::Inliner(char &ID, bool InsertLifetime) + : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should @@ -243,67 +220,6 @@ return true; } -unsigned Inliner::getInlineThreshold(CallSite CS) const { - int Threshold = InlineThreshold; // -inline-threshold or else selected by - // overall opt level - - // If -inline-threshold is not given, listen to the optsize attribute when it - // would decrease the threshold. - Function *Caller = CS.getCaller(); - bool OptSize = Caller && !Caller->isDeclaration() && - // FIXME: Use Function::optForSize(). - Caller->hasFnAttribute(Attribute::OptimizeForSize); - if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && - OptSizeThreshold < Threshold) - Threshold = OptSizeThreshold; - - Function *Callee = CS.getCalledFunction(); - if (!Callee || Callee->isDeclaration()) - return Threshold; - - // If profile information is available, use that to adjust threshold of hot - // and cold functions. - // FIXME: The heuristic used below for determining hotness and coldness are - // based on preliminary SPEC tuning and may not be optimal. Replace this with - // a well-tuned heuristic based on *callsite* hotness and not callee hotness. - uint64_t FunctionCount = 0, MaxFunctionCount = 0; - bool HasPGOCounts = false; - if (Callee->getEntryCount() && - Callee->getParent()->getMaximumFunctionCount()) { - HasPGOCounts = true; - FunctionCount = Callee->getEntryCount().getValue(); - MaxFunctionCount = - Callee->getParent()->getMaximumFunctionCount().getValue(); - } - - // Listen to the inlinehint attribute or profile based hotness information - // when it would increase the threshold and the caller does not need to - // minimize its size. - bool InlineHint = - Callee->hasFnAttribute(Attribute::InlineHint) || - (HasPGOCounts && - FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)); - if (InlineHint && HintThreshold > Threshold && - !Caller->hasFnAttribute(Attribute::MinSize)) - Threshold = HintThreshold; - - // Listen to the cold attribute or profile based coldness information - // when it would decrease the threshold. - bool ColdCallee = - Callee->hasFnAttribute(Attribute::Cold) || - (HasPGOCounts && - FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount)); - // Command line argument for InlineLimit will override the default - // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, - // do not use the default cold threshold even if it is smaller. - if ((InlineLimit.getNumOccurrences() == 0 || - ColdThreshold.getNumOccurrences() > 0) && ColdCallee && - ColdThreshold < Threshold) - Threshold = ColdThreshold; - - return Threshold; -} - static void emitAnalysis(CallSite CS, const Twine &Msg) { Function *Caller = CS.getCaller(); LLVMContext &Ctx = Caller->getContext();