Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -331,7 +331,8 @@ /// Recursively traverses all children, if the corresponding function is /// not defined in module \p M, and its total sample is no less than - /// \p Threshold, add its corresponding GUID to \p S. + /// \p Threshold, add its corresponding GUID to \p S. Also traverse the + /// BodySamples to add hot CallTarget's GUID to \p S. void findImportedFunctions(DenseSet &S, const Module *M, uint64_t Threshold) const { if (TotalSamples <= Threshold) @@ -339,6 +340,15 @@ Function *F = M->getFunction(Name); if (!F || !F->getSubprogram()) S.insert(Function::getGUID(Name)); + // Import hot CallTargets, which may not be available in IR because full + // profile annotation cannot be done until backend compilation in ThinLTO. + for (const auto &BS : BodySamples) + for (const auto &TS : BS.second.getCallTargets()) + if (TS.getValue() > Threshold) { + Function *Callee = M->getFunction(TS.getKey()); + if (!Callee || !Callee->getSubprogram()) + S.insert(Function::getGUID(TS.getKey())); + } for (auto CS : CallsiteSamples) for (const auto &NameFS : CS.second) NameFS.second.findImportedFunctions(S, M, Threshold); Index: include/llvm/Transforms/SampleProfile.h =================================================================== --- include/llvm/Transforms/SampleProfile.h +++ include/llvm/Transforms/SampleProfile.h @@ -21,10 +21,12 @@ class SampleProfileLoaderPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - SampleProfileLoaderPass(std::string File = "") : ProfileFileName(File) {} + SampleProfileLoaderPass(std::string File = "", bool IsThinLTOPreLink = false) + : ProfileFileName(File), IsThinLTOPreLink(IsThinLTOPreLink) {} private: std::string ProfileFileName; + bool IsThinLTOPreLink; }; } // End llvm namespace Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -555,7 +555,8 @@ if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -149,13 +149,14 @@ class SampleProfileLoader { public: SampleProfileLoader( - StringRef Name, + StringRef Name, bool IsThinLTOPreLink, std::function GetAssumptionCache, std::function GetTargetTransformInfo) : DT(nullptr), PDT(nullptr), LI(nullptr), GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), Reader(), Samples(nullptr), - Filename(Name), ProfileIsValid(false), TotalCollectedSamples(0), - ORE(nullptr) {} + Filename(Name), ProfileIsValid(false), + IsThinLTOPreLink(IsThinLTOPreLink), + TotalCollectedSamples(0), ORE(nullptr) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM); @@ -252,6 +253,12 @@ /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; + /// \brief Flag indicating if the pass is invoked in ThinLTO compile phase. + /// + /// In this phase, in annotation, we should not promote indirect calls. + /// Instead, we will mark GUIDs that needs to be annotated to the function. + bool IsThinLTOPreLink; + /// \brief Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed @@ -267,8 +274,9 @@ // Class identification, replacement for typeinfo static char ID; - SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile) - : ModulePass(ID), SampleLoader(Name, + SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, + bool IsThinLTOPreLink = false) + : ModulePass(ID), SampleLoader(Name, IsThinLTOPreLink, [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }, @@ -755,6 +763,12 @@ if (PromotedInsns.count(I)) continue; for (const auto *FS : findIndirectCallFunctionSamples(*I)) { + if (IsThinLTOPreLink) { + FS->findImportedFunctions(ImportGUIDs, F.getParent(), + Samples->getTotalSamples() * + SampleProfileHotThreshold / 100); + continue; + } auto CalleeFunctionName = FS->getName(); // If it is a recursive call, we do not inline it as it could bloat // the code exponentially. There is way to better handle this, e.g. @@ -783,16 +797,16 @@ inlineCallInstruction(DI)) LocalChanged = true; } else { - FS->findImportedFunctions(ImportGUIDs, F.getParent(), - Samples->getTotalSamples() * - SampleProfileHotThreshold / 100); + DEBUG(dbgs() + << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason << "\n"); } } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { if (inlineCallInstruction(I)) LocalChanged = true; - } else { + } else if (IsThinLTOPreLink) { findCalleeFunctionSamples(*I)->findImportedFunctions( ImportGUIDs, F.getParent(), Samples->getTotalSamples() * SampleProfileHotThreshold / 100); @@ -1558,9 +1572,9 @@ return FAM.getResult(F); }; - SampleProfileLoader SampleLoader(ProfileFileName.empty() ? SampleProfileFile - : ProfileFileName, - GetAssumptionCache, GetTTI); + SampleProfileLoader SampleLoader( + ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, + IsThinLTOPreLink, GetAssumptionCache, GetTTI); SampleLoader.doInitialization(M); Index: test/Transforms/SampleProfile/Inputs/import.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/import.prof +++ test/Transforms/SampleProfile/Inputs/import.prof @@ -5,4 +5,4 @@ 4: foo1:1000 1: 1000 4: foo2:1000 - 1: 1000 + 1: 1000 foo3:1000 Index: test/Transforms/SampleProfile/import.ll =================================================================== --- test/Transforms/SampleProfile/import.ll +++ test/Transforms/SampleProfile/import.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/import.prof -S | FileCheck %s +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/import.prof -S | FileCheck %s ; Tests whether the functions in the inline stack are added to the ; function_entry_count metadata. @@ -15,9 +15,9 @@ ret void } -; GUIDs of foo, bar, foo1 and foo2 should be included in the metadata to make -; sure hot inline stacks are imported. -; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -2012135647395072713} +; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to +; make sure hot inline stacks are imported. +; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713} !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9}