Index: include/llvm/Analysis/InlineCost.h =================================================================== --- include/llvm/Analysis/InlineCost.h +++ include/llvm/Analysis/InlineCost.h @@ -16,6 +16,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include #include @@ -179,12 +180,11 @@ /// /// Also note that calling this function *dynamically* computes the cost of /// inlining the callsite. It is an expensive, heavyweight call. -InlineCost -getInlineCost(CallSite CS, const InlineParams &Params, - TargetTransformInfo &CalleeTTI, - std::function &GetAssumptionCache, - Optional> GetBFI, - ProfileSummaryInfo *PSI); +InlineCost getInlineCost( + CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, + Optional> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr); /// \brief Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a @@ -196,7 +196,7 @@ TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI); + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE); /// \brief Minimal filter to detect invalid constructs for inlining. bool isInlineViable(Function &Callee); Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -82,6 +82,11 @@ "entry frequency, for a callsite to be hot in the absence of " "profile information.")); +static cl::opt ComputeFullInlineCost( + "inline-cost-full", cl::Hidden, cl::init(false), + cl::desc("Compute the full inline cost of a call site even when the cost " + "exceeds the threshold.")); + namespace { class CallAnalyzer : public InstVisitor { @@ -1198,7 +1203,7 @@ std::min((int64_t)CostUpperBound, (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - if (CostLowerBound > Threshold) { + if (CostLowerBound > Threshold && !ComputeFullInlineCost) { Cost = CostLowerBound; return false; } @@ -1361,7 +1366,7 @@ // Check if we've past the maximum possible threshold so we don't spin in // huge basic blocks that will never inline. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) return false; } @@ -1447,7 +1452,7 @@ Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) return false; if (F.empty()) @@ -1513,7 +1518,7 @@ for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1657,9 +1662,9 @@ CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI); + GetAssumptionCache, GetBFI, PSI, ORE); } InlineCost llvm::getInlineCost( @@ -1667,7 +1672,7 @@ TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { // Cannot inline indirect calls. if (!Callee) @@ -1699,6 +1704,9 @@ CS.isNoInline()) return llvm::InlineCost::getNever(); + if (ORE) + ComputeFullInlineCost = true; + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); Index: lib/Transforms/IPO/InlineSimple.cpp =================================================================== --- lib/Transforms/IPO/InlineSimple.cpp +++ lib/Transforms/IPO/InlineSimple.cpp @@ -57,12 +57,13 @@ InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); + OptimizationRemarkEmitter ORE(CS.getCaller()); std::function GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, - /*GetBFI=*/None, PSI); + /*GetBFI=*/None, PSI, &ORE); } bool runOnSCC(CallGraphSCC &SCC) override; Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -840,6 +840,10 @@ FunctionAnalysisManager &FAM = AM.getResult(*C, CG) .getManager(); + + // Get the remarks emission analysis for the caller. + auto &ORE = FAM.getResult(F); + std::function GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); @@ -852,12 +856,9 @@ Function &Callee = *CS.getCalledFunction(); auto &CalleeTTI = FAM.getResult(Callee); return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, - PSI); + PSI, &ORE); }; - // Get the remarks emission analysis for the caller. - auto &ORE = FAM.getResult(F); - // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -462,7 +462,7 @@ Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, - *GetAssumptionCache, GetBFI, PSI); + *GetAssumptionCache, GetBFI, PSI, &ORE); if (IC.isAlways()) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)