diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -416,20 +416,18 @@ findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; mutable DenseMap DILocation2SampleMap; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName, - uint64_t &Sum, uint64_t Count, CallBase *I, - const char *&Reason); - bool inlineCallInstruction(CallBase &CB, - const FunctionSamples *CalleeSamples); + // Attempt to promote indirect call and also inline the promoted call + bool tryPromoteAndInlineCandidate( + Function &F, InlineCandidate &Candidate, uint64_t &Sum, + DenseSet &PromotedInsns, + SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctions(Function &F, DenseSet &InlinedGUIDs); - // Helper functions call-site prioritized BFS inliner - // Will change the main FDO inliner to be work list based directly in - // upstream, then merge this change with that and remove the duplication. InlineCost shouldInlineCandidate(InlineCandidate &Candidate); bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); - bool tryInlineCandidate(InlineCandidate &Candidate, - SmallVector &InlinedCallSites); + bool + tryInlineCandidate(InlineCandidate &Candidate, + SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctionsWithPriority(Function &F, DenseSet &InlinedGUIDs); @@ -1077,70 +1075,45 @@ return it.first->second; } -CallBase * -SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName, - uint64_t &Sum, uint64_t Count, - CallBase *I, const char *&Reason) { - Reason = "Callee function not available"; +/// Attempt to promote indirect call and also inline the promoted call. +/// +/// \param F Caller function. +/// \param Candidate ICP and inline candidate. +/// \param Sum Sum of target counts for indirect call. +/// \param PromotedInsns Map to keep track of indirect call already processed. +/// \param Candidate ICP and inline candidate. +/// \param InlinedCallSite Output vector for new call sites exposed after +/// inlining. +bool SampleProfileLoader::tryPromoteAndInlineCandidate( + Function &F, InlineCandidate &Candidate, uint64_t &Sum, + DenseSet &PromotedInsns, + SmallVector *InlinedCallSite) { + const char *Reason = "Callee function not available"; // R->getValue() != &F is to prevent promoting a recursive call. // If it is a recursive call, we do not inline it as it could bloat // the code exponentially. There is way to better handle this, e.g. // clone the caller first, and inline the cloned caller if it is // recursive. As llvm does not inline recursive calls, we will // simply ignore it instead of handling it explicitly. - auto R = SymbolMap.find(CalleeName); + auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName()); if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && R->getValue()->hasFnAttribute("use-sample-profile") && - R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) { + R->getValue() != &F && + isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) { auto *DI = - &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE); - Sum -= Count; - return DI; - } - return nullptr; -} - -bool SampleProfileLoader::inlineCallInstruction( - CallBase &CB, const FunctionSamples *CalleeSamples) { - if (ExternalInlineAdvisor) { - auto Advice = ExternalInlineAdvisor->getAdvice(CB); - if (!Advice->isInliningRecommended()) { - Advice->recordUnattemptedInlining(); - return false; + &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(), + Candidate.CallsiteCount, Sum, false, ORE); + if (DI) { + Sum -= Candidate.CallsiteCount; + PromotedInsns.insert(Candidate.CallInstr); + Candidate.CallInstr = DI; + if (isa(DI) || isa(DI)) + return tryInlineCandidate(Candidate, InlinedCallSite); } - // Dummy record, we don't use it for replay. - Advice->recordInlining(); - } - - Function *CalledFunction = CB.getCalledFunction(); - assert(CalledFunction); - DebugLoc DLoc = CB.getDebugLoc(); - BasicBlock *BB = CB.getParent(); - InlineParams Params = getInlineParams(); - Params.ComputeFullInlineCost = true; - // Checks if there is anything in the reachable portion of the callee at - // this callsite that makes this inlining potentially illegal. Need to - // set ComputeFullInlineCost, otherwise getInlineCost may return early - // when cost exceeds threshold without checking all IRs in the callee. - // The acutal cost does not matter because we only checks isNever() to - // see if it is legal to inline the callsite. - InlineCost Cost = - getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI); - if (Cost.isNever()) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) - << "incompatible inlining"); - return false; - } - InlineFunctionInfo IFI(nullptr, GetAC); - if (InlineFunction(CB, IFI).isSuccess()) { - // The call to InlineFunction erases I, so we can't pass it here. - emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, - true, CSINLINE_DEBUG); - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined(CalleeSamples); - ++NumCSInlined; - return true; + } else { + LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason << "\n"); } return false; } @@ -1206,10 +1179,11 @@ "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled"); - DenseMap localNotInlinedCallSites; + DenseMap LocalNotInlinedCallSites; bool Changed = false; - while (true) { - bool LocalChanged = false; + bool LocalChanged = true; + while (LocalChanged) { + LocalChanged = false; SmallVector CIS; for (auto &BB : F) { bool Hot = false; @@ -1223,7 +1197,7 @@ "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); if (FS->getEntrySamples() > 0 || ProfileIsCS) - localNotInlinedCallSites.try_emplace(CB, FS); + LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI)) Hot = true; else if (shouldInlineColdCallee(*CB)) @@ -1241,6 +1215,11 @@ } for (CallBase *I : CIS) { Function *CalledFunction = I->getCalledFunction(); + InlineCandidate Candidate = {I, + LocalNotInlinedCallSites.count(I) + ? LocalNotInlinedCallSites[I] + : nullptr, + 0 /* dummy count */}; // Do not inline recursive calls. if (CalledFunction == &F) continue; @@ -1257,30 +1236,16 @@ if (!callsiteIsHot(FS, PSI)) continue; - const char *Reason = nullptr; - auto CalleeFunctionName = FS->getFuncName(); - if (CallBase *DI = - tryPromoteIndirectCall(F, CalleeFunctionName, Sum, - FS->getEntrySamples(), I, Reason)) { - PromotedInsns.insert(I); - // If profile mismatches, we should not attempt to inline DI. - if ((isa(DI) || isa(DI)) && - inlineCallInstruction(cast(*DI), FS)) { - localNotInlinedCallSites.erase(I); - LocalChanged = true; - } - } else { - LLVM_DEBUG(dbgs() - << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); + Candidate = {I, FS, FS->getEntrySamples()}; + if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) { + LocalNotInlinedCallSites.erase(I); + LocalChanged = true; } } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I) - ? localNotInlinedCallSites[I] - : nullptr)) { - localNotInlinedCallSites.erase(I); + if (tryInlineCandidate(Candidate)) { + LocalNotInlinedCallSites.erase(I); LocalChanged = true; } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1288,11 +1253,7 @@ InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); } } - if (LocalChanged) { - Changed = true; - } else { - break; - } + Changed |= LocalChanged; } // For CS profile, profile for not inlined context will be merged when @@ -1301,7 +1262,7 @@ return Changed; // Accumulate not inlined callsite information into notInlinedSamples - for (const auto &Pair : localNotInlinedCallSites) { + for (const auto &Pair : LocalNotInlinedCallSites) { CallBase *I = Pair.getFirst(); Function *Callee = I->getCalledFunction(); if (!Callee || Callee->isDeclaration()) @@ -1347,7 +1308,7 @@ } bool SampleProfileLoader::tryInlineCandidate( - InlineCandidate &Candidate, SmallVector &InlinedCallSites) { + InlineCandidate &Candidate, SmallVector *InlinedCallSites) { CallBase &CB = *Candidate.CallInstr; Function *CalledFunction = CB.getCalledFunction(); @@ -1372,9 +1333,11 @@ true, CSINLINE_DEBUG); // Now populate the list of newly exposed call sites. - InlinedCallSites.clear(); - for (auto &I : IFI.InlinedCallSites) - InlinedCallSites.push_back(I); + if (InlinedCallSites) { + InlinedCallSites->clear(); + for (auto &I : IFI.InlinedCallSites) + InlinedCallSites->push_back(I); + } if (ProfileIsCS) ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); @@ -1446,18 +1409,16 @@ InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params, GetTTI(*Callee), GetAC, GetTLI); + // Honor always inline and never inline from call analyzer + if (Cost.isNever() || Cost.isAlways()) + return Cost; + // For old FDO inliner, we inline the call site as long as cost is not // "Never". The cost-benefit check is done earlier. if (!CallsitePrioritizedInline) { - if (Cost.isNever()) - return Cost; - return InlineCost::getAlways("hot callsite previously inlined"); + return InlineCost::get(Cost.getCost(), INT_MAX); } - // Honor always inline and never inline from call analyzer - if (Cost.isNever() || Cost.isAlways()) - return Cost; - // Otherwise only use the cost from call analyzer, but overwite threshold with // Sample PGO threshold. return InlineCost::get(Cost.getCost(), SampleThreshold); @@ -1542,34 +1503,23 @@ // fixed, but we generate different types). if (!PSI->isHotCount(EntryCountDistributed)) break; - const char *Reason = nullptr; - auto CalleeFunctionName = FS->getFuncName(); - if (CallBase *DI = tryPromoteIndirectCall( - F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) { - // Attach function profile for promoted indirect callee, and update - // call site count for the promoted inline candidate too. - Candidate = {DI, FS, EntryCountDistributed}; - PromotedInsns.insert(I); - SmallVector InlinedCallSites; - // If profile mismatches, we should not attempt to inline DI. - if ((isa(DI) || isa(DI)) && - tryInlineCandidate(Candidate, InlinedCallSites)) { - for (auto *CB : InlinedCallSites) { - if (getInlineCandidate(&NewCandidate, CB)) - CQueue.emplace(NewCandidate); - } - Changed = true; + SmallVector InlinedCallSites; + // Attach function profile for promoted indirect callee, and update + // call site count for the promoted inline candidate too. + Candidate = {I, FS, EntryCountDistributed}; + if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns, + &InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); } - } else { - LLVM_DEBUG(dbgs() - << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); + Changed = true; } } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { SmallVector InlinedCallSites; - if (tryInlineCandidate(Candidate, InlinedCallSites)) { + if (tryInlineCandidate(Candidate, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { if (getInlineCandidate(&NewCandidate, CB)) CQueue.emplace(NewCandidate); diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -89,7 +89,7 @@ ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '15' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '225' +;YAML-NEXT: - Threshold: '2147483647' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: foo diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll --- a/llvm/test/Transforms/SampleProfile/remarks.ll +++ b/llvm/test/Transforms/SampleProfile/remarks.ll @@ -21,7 +21,7 @@ ; We are expecting foo() to be inlined in main() (almost all the cycles are ; spent inside foo). -; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0:21; +; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21; ; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21; ; The back edge for the loop is the hottest edge in the loop subgraph. @@ -47,7 +47,7 @@ ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '130' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '225' +;YAML-NEXT: - Threshold: '2147483647' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: main