Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -339,6 +339,16 @@ Function *F = M->getFunction(Name); if (!F || !F->getSubprogram()) S.insert(Function::getGUID(Name)); + // Import hot callsites that are not inlined in the profile. + // We need it because if we may not be able to import and inline FS to + // expose the callsite as IR in ThinLTO prelink phase. + for (const auto &BS : BodySamples) + for (const auto &TS : BS.second.getCallTargets()) + if (TS.getValue() > Threshold) { + Function *Callee = M->getFunction(TS.getKey()); + if (!Callee || !Callee->getSubprogram()) + S.insert(Function::getGUID(TS.getKey())); + } for (auto CS : CallsiteSamples) for (const auto &NameFS : CS.second) NameFS.second.findImportedFunctions(S, M, Threshold); Index: include/llvm/Transforms/SampleProfile.h =================================================================== --- include/llvm/Transforms/SampleProfile.h +++ include/llvm/Transforms/SampleProfile.h @@ -21,10 +21,12 @@ class SampleProfileLoaderPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - SampleProfileLoaderPass(std::string File = "") : ProfileFileName(File) {} + SampleProfileLoaderPass(std::string File = "", bool IsThinLTOPreLink = false) + : ProfileFileName(File), IsThinLTOPreLink(IsThinLTOPreLink) {} private: std::string ProfileFileName; + bool IsThinLTOPreLink; }; } // End llvm namespace Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -555,7 +555,8 @@ if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -149,13 +149,14 @@ class SampleProfileLoader { public: SampleProfileLoader( - StringRef Name, + StringRef Name, bool IsThinLTOPreLink, std::function GetAssumptionCache, std::function GetTargetTransformInfo) : DT(nullptr), PDT(nullptr), LI(nullptr), GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), Reader(), Samples(nullptr), - Filename(Name), ProfileIsValid(false), TotalCollectedSamples(0), - ORE(nullptr) {} + Filename(Name), ProfileIsValid(false), + IsThinLTOPreLink(IsThinLTOPreLink), + TotalCollectedSamples(0), ORE(nullptr) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM); @@ -252,6 +253,12 @@ /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; + /// \brief Flag indicating if the pass is invoked in ThinLTO compile phase. + /// + /// In this phase, in annotation, we should not promote indirect calls. + /// Instead, we will mark GUIDs that needs to be annotated to the function. + bool IsThinLTOPreLink; + /// \brief Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed @@ -267,8 +274,9 @@ // Class identification, replacement for typeinfo static char ID; - SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile) - : ModulePass(ID), SampleLoader(Name, + SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, + bool IsThinLTOPreLink = false) + : ModulePass(ID), SampleLoader(Name, IsThinLTOPreLink, [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }, @@ -755,33 +763,40 @@ if (PromotedInsns.count(I)) continue; for (const auto *FS : findIndirectCallFunctionSamples(*I)) { - auto CalleeFunctionName = FS->getName(); - // If it is a recursive call, we do not inline it as it could bloat - // the code exponentially. There is way to better handle this, e.g. - // clone the caller first, and inline the cloned caller if it is - // recursive. As llvm does not inline recursive calls, we will - // simply ignore it instead of handling it explicitly. - if (CalleeFunctionName == F.getName()) - continue; - - const char *Reason = "Callee function not available"; - auto R = SymbolMap.find(CalleeFunctionName); - if (R != SymbolMap.end() && R->getValue() && - !R->getValue()->isDeclaration() && - R->getValue()->getSubprogram() && - isLegalToPromote(I, R->getValue(), &Reason)) { - // The indirect target was promoted and inlined in the profile, - // as a result, we do not have profile info for the branch - // probability. We set the probability to 80% taken to indicate - // that the static call is likely taken. - Instruction *DI = dyn_cast( - promoteIndirectCall(I, R->getValue(), 80, 100, false, ORE) - ->stripPointerCasts()); - PromotedInsns.insert(I); - // If profile mismatches, we should not attempt to inline DI. - if ((isa(DI) || isa(DI)) && - inlineCallInstruction(DI)) - LocalChanged = true; + if (!IsThinLTOPreLink) { + auto CalleeFunctionName = FS->getName(); + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will + // simply ignore it instead of handling it explicitly. + if (CalleeFunctionName == F.getName()) + continue; + + const char *Reason = "Callee function not available"; + auto R = SymbolMap.find(CalleeFunctionName); + if (R != SymbolMap.end() && R->getValue() && + !R->getValue()->isDeclaration() && + R->getValue()->getSubprogram() && + isLegalToPromote(I, R->getValue(), &Reason)) { + // The indirect target was promoted and inlined in the profile, + // as a result, we do not have profile info for the branch + // probability. We set the probability to 80% taken to indicate + // that the static call is likely taken. + Instruction *DI = dyn_cast( + promoteIndirectCall(I, R->getValue(), 80, 100, false, ORE) + ->stripPointerCasts()); + PromotedInsns.insert(I); + // If profile mismatches, we should not attempt to inline DI. + if ((isa(DI) || isa(DI)) && + inlineCallInstruction(DI)) + LocalChanged = true; + } else { + DEBUG(dbgs() + << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason << "\n"); + continue; + } } else { FS->findImportedFunctions(ImportGUIDs, F.getParent(), Samples->getTotalSamples() * @@ -792,7 +807,7 @@ !CalledFunction->isDeclaration()) { if (inlineCallInstruction(I)) LocalChanged = true; - } else { + } else if (IsThinLTOPreLink) { findCalleeFunctionSamples(*I)->findImportedFunctions( ImportGUIDs, F.getParent(), Samples->getTotalSamples() * SampleProfileHotThreshold / 100); @@ -1558,9 +1573,9 @@ return FAM.getResult(F); }; - SampleProfileLoader SampleLoader(ProfileFileName.empty() ? SampleProfileFile - : ProfileFileName, - GetAssumptionCache, GetTTI); + SampleProfileLoader SampleLoader( + ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, + IsThinLTOPreLink, GetAssumptionCache, GetTTI); SampleLoader.doInitialization(M); Index: test/Transforms/SampleProfile/Inputs/import.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/import.prof +++ test/Transforms/SampleProfile/Inputs/import.prof @@ -5,4 +5,4 @@ 4: foo1:1000 1: 1000 4: foo2:1000 - 1: 1000 + 1: 1000 foo3:1000 Index: test/Transforms/SampleProfile/import.ll =================================================================== --- test/Transforms/SampleProfile/import.ll +++ test/Transforms/SampleProfile/import.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/import.prof -S | FileCheck %s +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/import.prof -S | FileCheck %s ; Tests whether the functions in the inline stack are added to the ; function_entry_count metadata. @@ -15,9 +15,9 @@ ret void } -; GUIDs of foo, bar, foo1 and foo2 should be included in the metadata to make -; sure hot inline stacks are imported. -; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -2012135647395072713} +; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to +; make sure hot inline stacks are imported. +; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713} !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9}