Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -339,6 +339,13 @@ Function *F = M->getFunction(Name); if (!F || !F->getSubprogram()) S.insert(Function::getGUID(Name)); + for (const auto &BS : BodySamples) + for (const auto &TS : BS.second.getCallTargets()) + if (TS.getValue() > Threshold) { + Function *Callee = M->getFunction(TS.getKey()); + if (!Callee || !Callee->getSubprogram()) + S.insert(Function::getGUID(TS.getKey())); + } for (auto CS : CallsiteSamples) for (const auto &NameFS : CS.second) NameFS.second.findImportedFunctions(S, M, Threshold); Index: include/llvm/Transforms/SampleProfile.h =================================================================== --- include/llvm/Transforms/SampleProfile.h +++ include/llvm/Transforms/SampleProfile.h @@ -21,10 +21,12 @@ class SampleProfileLoaderPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - SampleProfileLoaderPass(std::string File = "") : ProfileFileName(File) {} + SampleProfileLoaderPass(std::string File = "", bool InThinLTOCompile = false) + : ProfileFileName(File), InThinLTOCompile(InThinLTOCompile) {} private: std::string ProfileFileName; + bool InThinLTOCompile; }; } // End llvm namespace Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -555,7 +555,8 @@ if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -86,6 +86,10 @@ "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"), cl::desc("Inlined functions that account for more than N% of all samples " "collected in the parent function, will be inlined again.")); +static cl::opt SampleProfileInThinLTOCompile( + "sample-profile-in-thinlto-compile", cl::init(false), cl::value_desc("N"), + cl::desc("Whether the pass is invoked in ThinLTO compile phase. This is " + "for testing purpose only.")); namespace { typedef DenseMap BlockWeightMap; @@ -149,13 +153,14 @@ class SampleProfileLoader { public: SampleProfileLoader( - StringRef Name, + StringRef Name, bool InThinLTOCompile, std::function GetAssumptionCache, std::function GetTargetTransformInfo) : DT(nullptr), PDT(nullptr), LI(nullptr), GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), Reader(), Samples(nullptr), - Filename(Name), ProfileIsValid(false), TotalCollectedSamples(0), - ORE(nullptr) {} + Filename(Name), ProfileIsValid(false), + InThinLTOCompile(InThinLTOCompile || SampleProfileInThinLTOCompile), + TotalCollectedSamples(0), ORE(nullptr) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM); @@ -172,6 +177,7 @@ std::vector findIndirectCallFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; + bool inlineCallInstruction(Instruction *I); bool inlineHotFunctions(Function &F, DenseSet &ImportGUIDs); void printEdgeWeight(raw_ostream &OS, Edge E); @@ -251,6 +257,12 @@ /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; + /// \brief Flag indicating if the pass is invoked in ThinLTO compile phase. + /// + /// In this phase, in annotation, we should not promote indirect calls. + /// Instead, we will mark GUIDs that needs to be annotated to the function. + bool InThinLTOCompile; + /// \brief Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed @@ -266,8 +278,9 @@ // Class identification, replacement for typeinfo static char ID; - SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile) - : ModulePass(ID), SampleLoader(Name, + SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, + bool InThinLTOCompile = false) + : ModulePass(ID), SampleLoader(Name, InThinLTOCompile, [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }, @@ -676,6 +689,39 @@ return FS; } +bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { + assert(isa(I) || isa(I)); + CallSite CS(I); + Function *CalledFunction = CS.getCalledFunction(); + assert(CalledFunction); + DebugLoc DLoc = I->getDebugLoc(); + BasicBlock *BB = I->getParent(); + InlineParams Params = getInlineParams(); + Params.ComputeFullInlineCost = true; + // Checks if there is anything in the reachable portion of the callee at + // this callsite that makes this inlining potentially illegal. Need to + // set ComputeFullInlineCost, otherwise getInlineCost may return early + // when cost exceeds threshold without checking all IRs in the callee. + // The acutal cost does not matter because we only checks isNever() to + // see if it is legal to inline the callsite. + InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC, + None, nullptr, nullptr); + if (Cost.isNever()) { + ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB) + << "incompatible inlining"); + return false; + } + InlineFunctionInfo IFI(nullptr, &GetAC); + if (InlineFunction(CS, IFI)) { + // The call to InlineFunction erases I, so we can't pass it here. + ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB) + << "inlined hot callee '" << ore::NV("Callee", CalledFunction) + << "' into '" << ore::NV("Caller", BB->getParent()) << "'"); + return true; + } + return false; +} + /// \brief Iteratively inline hot callsites of a function. /// /// Iteratively traverse all callsites of the function \p F, and find if @@ -713,82 +759,62 @@ } } for (auto I : CIS) { - InlineFunctionInfo IFI(nullptr, &GetAC); Function *CalledFunction = CallSite(I).getCalledFunction(); // Do not inline recursive calls. if (CalledFunction == &F) continue; - Instruction *DI = I; - if (!CalledFunction && !PromotedInsns.count(I) && - CallSite(I).isIndirectCall()) { + if (CallSite(I).isIndirectCall()) { + if (PromotedInsns.count(I)) + continue; for (const auto *FS : findIndirectCallFunctionSamples(*I)) { - auto CalleeFunctionName = FS->getName(); - // If it is a recursive call, we do not inline it as it could bloat - // the code exponentially. There is way to better handle this, e.g. - // clone the caller first, and inline the cloned caller if it is - // recursive. As llvm does not inline recursive calls, we will simply - // ignore it instead of handling it explicitly. - if (CalleeFunctionName == F.getName()) - continue; - const char *Reason = "Callee function not available"; - auto R = SymbolMap.find(CalleeFunctionName); - if (R == SymbolMap.end()) - continue; - CalledFunction = R->getValue(); - if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { - // The indirect target was promoted and inlined in the profile, as a - // result, we do not have profile info for the branch probability. - // We set the probability to 80% taken to indicate that the static - // call is likely taken. - DI = dyn_cast( - promoteIndirectCall(I, CalledFunction, 80, 100, false, ORE) - ->stripPointerCasts()); - PromotedInsns.insert(I); + if (!InThinLTOCompile) { + auto CalleeFunctionName = FS->getName(); + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will + // simply ignore it instead of handling it explicitly. + if (CalleeFunctionName == F.getName()) + continue; + + const char *Reason = "Callee function not available"; + auto R = SymbolMap.find(CalleeFunctionName); + if (R != SymbolMap.end() && R->getValue() && + !R->getValue()->isDeclaration() && + R->getValue()->getSubprogram() && + isLegalToPromote(I, R->getValue(), &Reason)) { + // The indirect target was promoted and inlined in the profile, + // as a result, we do not have profile info for the branch + // probability. We set the probability to 80% taken to indicate + // that the static call is likely taken. + Instruction *DI = dyn_cast( + promoteIndirectCall(I, R->getValue(), 80, 100, false, ORE) + ->stripPointerCasts()); + PromotedInsns.insert(I); + // If profile mismatches, we should not attempt to inline DI. + if ((isa(DI) || isa(DI)) && + inlineCallInstruction(DI)) + LocalChanged = true; + } else { + DEBUG(dbgs() + << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason << "\n"); + continue; + } } else { - DEBUG(dbgs() << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason - << "\n"); - continue; + FS->findImportedFunctions(ImportGUIDs, F.getParent(), + Samples->getTotalSamples() * + SampleProfileHotThreshold / 100); } } - // If there is profile mismatch, we should not attempt to inline DI. - if (!isa(DI) && !isa(DI)) - continue; - } - if (!CalledFunction || !CalledFunction->getSubprogram()) { - // Handles functions that are imported from other modules. - for (const FunctionSamples *FS : findIndirectCallFunctionSamples(*I)) - FS->findImportedFunctions( - ImportGUIDs, F.getParent(), - Samples->getTotalSamples() * SampleProfileHotThreshold / 100); - continue; - } - assert(isa(DI) || isa(DI)); - CallSite CS(DI); - DebugLoc DLoc = I->getDebugLoc(); - BasicBlock *BB = I->getParent(); - InlineParams Params = getInlineParams(); - Params.ComputeFullInlineCost = true; - // Checks if there is anything in the reachable portion of the callee at - // this callsite that makes this inlining potentially illegal. Need to - // set ComputeFullInlineCost, otherwise getInlineCost may return early - // when cost exceeds threshold without checking all IRs in the callee. - // The acutal cost does not matter because we only checks isNever() to - // see if it is legal to inline the callsite. - InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC, - None, nullptr, nullptr); - if (Cost.isNever()) { - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB) - << "incompatible inlining"); - continue; - } - if (InlineFunction(CS, IFI)) { - LocalChanged = true; - // The call to InlineFunction erases DI, so we can't pass it here. - ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB) - << "inlined hot callee '" - << ore::NV("Callee", CalledFunction) << "' into '" - << ore::NV("Caller", &F) << "'"); + } else if (CalledFunction && CalledFunction->getSubprogram() && + !CalledFunction->isDeclaration()) { + if (inlineCallInstruction(I)) + LocalChanged = true; + } else if (InThinLTOCompile) { + findCalleeFunctionSamples(*I)->findImportedFunctions( + ImportGUIDs, F.getParent(), + Samples->getTotalSamples() * SampleProfileHotThreshold / 100); } } if (LocalChanged) { @@ -1551,9 +1577,9 @@ return FAM.getResult(F); }; - SampleProfileLoader SampleLoader(ProfileFileName.empty() ? SampleProfileFile - : ProfileFileName, - GetAssumptionCache, GetTTI); + SampleProfileLoader SampleLoader( + ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, + InThinLTOCompile, GetAssumptionCache, GetTTI); SampleLoader.doInitialization(M); Index: test/Transforms/SampleProfile/Inputs/import.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/import.prof +++ test/Transforms/SampleProfile/Inputs/import.prof @@ -5,4 +5,4 @@ 4: foo1:1000 1: 1000 4: foo2:1000 - 1: 1000 + 1: 1000 foo3:1000 Index: test/Transforms/SampleProfile/import.ll =================================================================== --- test/Transforms/SampleProfile/import.ll +++ test/Transforms/SampleProfile/import.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/import.prof -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/import.prof -sample-profile-in-thinlto-compile -S | FileCheck %s ; Tests whether the functions in the inline stack are added to the ; function_entry_count metadata. @@ -15,9 +15,9 @@ ret void } -; GUIDs of foo, bar, foo1 and foo2 should be included in the metadata to make -; sure hot inline stacks are imported. -; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -2012135647395072713} +; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to +; make sure hot inline stacks are imported. +; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713} !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9}