diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -56,7 +56,7 @@ std::queue Queue; for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) { ContextTrieNode *Callee = &Child.second; - addProfiledFunction(Callee->getFuncName()); + addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); } @@ -72,9 +72,10 @@ // context-based one, which may in turn block context-based inlining. for (auto &Child : Caller->getAllChildContext()) { ContextTrieNode *Callee = &Child.second; - addProfiledFunction(Callee->getFuncName()); + addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); - addProfiledCall(Caller->getFuncName(), Callee->getFuncName()); + addProfiledCall(ContextTracker.getFuncNameFor(Caller), + ContextTracker.getFuncNameFor(Callee)); } } } diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -104,7 +104,8 @@ // deterministically. using ContextSamplesTy = std::set; - SampleContextTracker(SampleProfileMap &Profiles); + SampleContextTracker(SampleProfileMap &Profiles, + const DenseMap *GUIDToFuncNameMap); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, @@ -128,6 +129,8 @@ FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true); // Retrieve the context trie node for given profile context ContextTrieNode *getContextFor(const SampleContext &Context); + // Get real function name for a given trie node. + StringRef getFuncNameFor(ContextTrieNode *Node) const; // Mark a context profile as inlined when function is inlined. // This makes sure that inlined context profile will be excluded in // function's base profile. @@ -156,6 +159,9 @@ // Map from function name to context profiles (excluding base profile) StringMap FuncToCtxtProfiles; + // Map from function guid to real function names. Only used in md5 mode. + const DenseMap *GUIDToFuncNameMap; + // Root node for context trie tree ContextTrieNode RootContext; }; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -810,19 +810,7 @@ } } - if (useMD5()) { - for (auto Name : FuncsToUse) { - auto GUID = std::to_string(MD5Hash(Name)); - auto iter = FuncOffsetTable.find(StringRef(GUID)); - if (iter == FuncOffsetTable.end()) - continue; - const uint8_t *FuncProfileAddr = Start + iter->second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = - readFuncProfile(FuncProfileAddr, FunctionSamples::ProfileIsCS)) - return EC; - } - } else if (FunctionSamples::ProfileIsCS) { + if (FunctionSamples::ProfileIsCS) { // Compute the ordered set of names, so we can // get all context profiles under a subtree by // iterating through the ordered names. @@ -831,14 +819,25 @@ OrderedNames.insert(Name.first); } + DenseSet FuncGuidsToUse; + if (useMD5()) { + for (auto Name : FuncsToUse) + FuncGuidsToUse.insert(Function::getGUID(Name)); + } + // For each function in current module, load all // context profiles for the function. for (auto NameOffset : FuncOffsetTable) { SampleContext FContext = NameOffset.first; auto FuncName = FContext.getNameWithoutContext(); - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) - continue; + if (useMD5()) { + if (!FuncGuidsToUse.count(FunctionSamples::getGUID(FuncName))) + continue; + } else { + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) + continue; + } // For each context profile we need, try to load // all context profile in the subtree. This can @@ -847,8 +846,7 @@ while (It != OrderedNames.end() && FContext.IsPrefixOf(*It)) { const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It]; assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile( - FuncProfileAddr, FunctionSamples::ProfileIsCS)) + if (std::error_code EC = readFuncProfile(FuncProfileAddr, true)) return EC; // Remove loaded context profile so we won't // load it repeatedly. @@ -856,17 +854,29 @@ } } } else { - for (auto NameOffset : FuncOffsetTable) { - SampleContext FContext(NameOffset.first); - auto FuncName = FContext.getNameWithoutContext(); - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) - continue; - const uint8_t *FuncProfileAddr = Start + NameOffset.second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = - readFuncProfile(FuncProfileAddr, FunctionSamples::ProfileIsCS)) - return EC; + if (useMD5()) { + for (auto Name : FuncsToUse) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr, false)) + return EC; + } + } else { + for (auto NameOffset : FuncOffsetTable) { + SampleContext FContext(NameOffset.first); + auto FuncName = FContext.getNameWithoutContext(); + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) + continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr, false)) + return EC; + } } } Data = End; @@ -1036,6 +1046,7 @@ auto Size = readNumber(); if (std::error_code EC = Size.getError()) return EC; + FunctionSamples::UseMD5 = true; MD5StringBuf = std::make_unique>(); MD5StringBuf->reserve(*Size); if (FixedLengthMD5) { diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -176,7 +176,10 @@ } // Profiler tracker than manages profiles and its associated context -SampleContextTracker::SampleContextTracker(SampleProfileMap &Profiles) { +SampleContextTracker::SampleContextTracker( + SampleProfileMap &Profiles, + const DenseMap *GUIDToFuncNameMap) + : GUIDToFuncNameMap(GUIDToFuncNameMap) { for (auto &FuncSample : Profiles) { FunctionSamples *FSamples = &FuncSample.second; SampleContext Context = FuncSample.first; @@ -200,6 +203,10 @@ return nullptr; CalleeName = FunctionSamples::getCanonicalFnName(CalleeName); + // Convert real function names to MD5 names, if the input profile is + // MD5-based. + std::string FGUID; + CalleeName = getRepInFormat(CalleeName, FunctionSamples::UseMD5, FGUID); // For indirect call, CalleeName will be empty, in which case the context // profile for callee with largest total samples will be returned. @@ -286,6 +293,11 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, bool MergeContext) { LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n"); + // Convert real function names to MD5 names, if the input profile is + // MD5-based. + std::string FGUID; + Name = getRepInFormat(Name, FunctionSamples::UseMD5, FGUID); + // Base profile is top-level node (child of root node), so try to retrieve // existing top-level node for given function first. If it exists, it could be // that we've merged base profile before, or there's actually context-less @@ -404,6 +416,13 @@ } } +StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const { + if (!FunctionSamples::UseMD5) + return Node->getFuncName(); + assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); + return GUIDToFuncNameMap->lookup(std::stoull(Node->getFuncName().data())); +} + ContextTrieNode * SampleContextTracker::getContextFor(const SampleContext &Context) { return getOrCreateContextPath(Context, false); @@ -446,6 +465,18 @@ RootName = PrevDIL->getScope()->getSubprogram()->getName(); S.push_back(std::make_pair(LineLocation(0, 0), RootName)); + // Convert real function names to MD5 names, if the input profile is + // MD5-based. + std::vector MD5Names; + if (FunctionSamples::UseMD5) { + for (auto &Location : S) { + std::string FGUID; + getRepInFormat(Location.second, FunctionSamples::UseMD5, FGUID); + MD5Names.push_back(FGUID); + Location.second = MD5Names.back(); + } + } + ContextTrieNode *ContextNode = &RootContext; int I = S.size(); while (--I >= 0 && ContextNode) { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -982,7 +982,7 @@ Function *Func = SymbolMap.lookup(Name); // Add to the import list only when it's defined out of module. if (!Func || Func->isDeclaration()) - InlinedGUIDs.insert(FunctionSamples::getGUID(Name)); + InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName())); // Import hot CallTargets, which may not be available in IR because full // profile annotation cannot be done until backend compilation in ThinLTO. @@ -992,7 +992,7 @@ StringRef CalleeName = CalleeSample->getFuncName(TS.getKey()); const Function *Callee = SymbolMap.lookup(CalleeName); if (!Callee || Callee->isDeclaration()) - InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName)); + InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey())); } // Import hot child context profile associted with callees. Note that this @@ -1815,8 +1815,8 @@ UseIterativeBFIInference = true; // Tracker for profiles under different context - ContextTracker = - std::make_unique(Reader->getProfiles()); + ContextTracker = std::make_unique( + Reader->getProfiles(), &GUIDToFuncNameMap); } // Load pseudo probe descriptors for probe-based function samples. diff --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll @@ -2,6 +2,8 @@ ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -S | FileCheck %s ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/csspgo-import-list.prof -o %t.prof ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof -S | FileCheck %s +; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/csspgo-import-list.prof -o %t.md5 +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 -S | FileCheck %s declare i32 @_Z5funcBi(i32 %x) declare i32 @_Z5funcAi(i32 %x) diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll @@ -2,6 +2,9 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s +; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/indirect-call-csspgo.prof -o %t.md5 +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s + define void @test(void ()*) #0 !dbg !3 { ;; Add two direct call to force top-down order for sample profile loader diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -6,7 +6,10 @@ ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; + +; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE + ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW ; diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -32,7 +32,7 @@ CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, uint64_t HotThreshold, uint64_t ColdThreshold) - : ContextTracker(Profiles), ProfileMap(Profiles), + : ContextTracker(Profiles, nullptr), ProfileMap(Profiles), HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {} std::vector CSPreInliner::buildTopDownOrder() {