Index: llvm/trunk/include/llvm/Analysis/InlineCost.h =================================================================== --- llvm/trunk/include/llvm/Analysis/InlineCost.h +++ llvm/trunk/include/llvm/Analysis/InlineCost.h @@ -16,6 +16,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include #include @@ -185,12 +186,11 @@ /// /// Also note that calling this function *dynamically* computes the cost of /// inlining the callsite. It is an expensive, heavyweight call. -InlineCost -getInlineCost(CallSite CS, const InlineParams &Params, - TargetTransformInfo &CalleeTTI, - std::function &GetAssumptionCache, - Optional> GetBFI, - ProfileSummaryInfo *PSI); +InlineCost getInlineCost( + CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, + Optional> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr); /// \brief Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a @@ -202,7 +202,7 @@ TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI); + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE); /// \brief Minimal filter to detect invalid constructs for inlining. bool isInlineViable(Function &Callee); Index: llvm/trunk/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp +++ llvm/trunk/lib/Analysis/InlineCost.cpp @@ -82,6 +82,11 @@ "entry frequency, for a callsite to be hot in the absence of " "profile information.")); +static cl::opt ComputeFullInlineCost( + "inline-cost-full", cl::Hidden, cl::init(false), + cl::desc("Compute the full inline cost of a call site even when the cost " + "exceeds the threshold.")); + namespace { class CallAnalyzer : public InstVisitor { @@ -106,6 +111,9 @@ // Cache the DataLayout since we use it a lot. const DataLayout &DL; + /// The OptimizationRemarkEmitter available for this compilation. + OptimizationRemarkEmitter *ORE; + /// The candidate callsite being analyzed. Please do not use this to do /// analysis in the caller function; we want the inline cost query to be /// easily cacheable. Instead, use the cover function paramHasAttr. @@ -243,10 +251,10 @@ CallAnalyzer(const TargetTransformInfo &TTI, std::function &GetAssumptionCache, Optional> &GetBFI, - ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, - const InlineParams &Params) + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, + Function &Callee, CallSite CSArg, const InlineParams &Params) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), - PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), + PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), @@ -1138,7 +1146,7 @@ // out. Pretend to inline the function, with a custom threshold. auto IndirectCallParams = Params; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS, + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS, IndirectCallParams); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the @@ -1198,7 +1206,7 @@ std::min((int64_t)CostUpperBound, (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - if (CostLowerBound > Threshold) { + if (CostLowerBound > Threshold && !ComputeFullInlineCost) { Cost = CostLowerBound; return false; } @@ -1347,21 +1355,36 @@ else Cost += InlineConstants::InstrCost; + using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr || HasFrameEscape) + HasIndirectBr || HasFrameEscape) { + if (ORE) + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", + CandidateCS.getInstruction()) + << NV("Callee", &F) + << " has uninlinable pattern and cost is not fully computed"); return false; + } // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { + if (ORE) + ORE->emit( + OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", + CandidateCS.getInstruction()) + << NV("Callee", &F) + << " is recursive and allocates too much stack space. Cost is " + "not fully computed"); return false; + } // Check if we've past the maximum possible threshold so we don't spin in // huge basic blocks that will never inline. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) return false; } @@ -1447,7 +1470,7 @@ Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) return false; if (F.empty()) @@ -1513,7 +1536,7 @@ for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1657,9 +1680,9 @@ CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI); + GetAssumptionCache, GetBFI, PSI, ORE); } InlineCost llvm::getInlineCost( @@ -1667,7 +1690,7 @@ TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { // Cannot inline indirect calls. if (!Callee) @@ -1699,10 +1722,13 @@ CS.isNoInline()) return llvm::InlineCost::getNever(); + if (ORE) + ComputeFullInlineCost = true; + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Caller->getName() << ")\n"); - CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS, + CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS, Params); bool ShouldInline = CA.analyzeCall(CS); Index: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp +++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp @@ -57,12 +57,13 @@ InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); + OptimizationRemarkEmitter ORE(CS.getCaller()); std::function GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, - /*GetBFI=*/None, PSI); + /*GetBFI=*/None, PSI, &ORE); } bool runOnSCC(CallGraphSCC &SCC) override; Index: llvm/trunk/lib/Transforms/IPO/Inliner.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/Inliner.cpp +++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp @@ -845,6 +845,10 @@ FunctionAnalysisManager &FAM = AM.getResult(*C, CG) .getManager(); + + // Get the remarks emission analysis for the caller. + auto &ORE = FAM.getResult(F); + std::function GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); @@ -857,12 +861,9 @@ Function &Callee = *CS.getCalledFunction(); auto &CalleeTTI = FAM.getResult(Callee); return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, - PSI); + PSI, &ORE); }; - // Get the remarks emission analysis for the caller. - auto &ORE = FAM.getResult(F); - // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. Index: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp +++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp @@ -462,7 +462,7 @@ Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, - *GetAssumptionCache, GetBFI, PSI); + *GetAssumptionCache, GetBFI, PSI, &ORE); if (IC.isAlways()) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)