diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -56,7 +56,7 @@ std::queue Queue; for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) { ContextTrieNode *Callee = &Child.second; - addProfiledFunction(Callee->getFuncName()); + addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); } @@ -72,9 +72,10 @@ // context-based one, which may in turn block context-based inlining. for (auto &Child : Caller->getAllChildContext()) { ContextTrieNode *Callee = &Child.second; - addProfiledFunction(Callee->getFuncName()); + addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); - addProfiledCall(Caller->getFuncName(), Callee->getFuncName()); + addProfiledCall(ContextTracker.getFuncNameFor(Caller), + ContextTracker.getFuncNameFor(Callee)); } } } diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -110,7 +110,8 @@ // deterministically. using ContextSamplesTy = std::set; - SampleContextTracker(SampleProfileMap &Profiles); + SampleContextTracker(SampleProfileMap &Profiles, + const DenseMap *GUIDToFuncNameMap); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, @@ -134,6 +135,8 @@ FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true); // Retrieve the context trie node for given profile context ContextTrieNode *getContextFor(const SampleContext &Context); + // Get real function name for a given trie node. + StringRef getFuncNameFor(ContextTrieNode *Node) const; // Mark a context profile as inlined when function is inlined. // This makes sure that inlined context profile will be excluded in // function's base profile. @@ -163,6 +166,9 @@ // Map from function name to context profiles (excluding base profile) StringMap FuncToCtxtProfiles; + // Map from function guid to real function names. Only used in md5 mode. + const DenseMap *GUIDToFuncNameMap; + // Root node for context trie tree ContextTrieNode RootContext; }; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -759,18 +759,7 @@ } } - if (useMD5()) { - for (auto Name : FuncsToUse) { - auto GUID = std::to_string(MD5Hash(Name)); - auto iter = FuncOffsetTable.find(StringRef(GUID)); - if (iter == FuncOffsetTable.end()) - continue; - const uint8_t *FuncProfileAddr = Start + iter->second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; - } - } else if (ProfileIsCS) { + if (ProfileIsCS) { // Compute the ordered set of names, so we can // get all context profiles under a subtree by // iterating through the ordered names. @@ -779,13 +768,20 @@ OrderedContexts.insert(Name.first); } + DenseSet FuncGuidsToUse; + if (useMD5()) { + for (auto Name : FuncsToUse) + FuncGuidsToUse.insert(Function::getGUID(Name)); + } + // For each function in current module, load all // context profiles for the function. for (auto NameOffset : FuncOffsetTable) { SampleContext FContext = NameOffset.first; auto FuncName = FContext.getName(); - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) + if ((useMD5() && !FuncGuidsToUse.count(std::stoull(FuncName.data()))) || + (!useMD5() && !FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName)))) continue; // For each context profile we need, try to load @@ -803,16 +799,29 @@ } } } else { - for (auto NameOffset : FuncOffsetTable) { - SampleContext FContext(NameOffset.first); - auto FuncName = FContext.getName(); - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) - continue; - const uint8_t *FuncProfileAddr = Start + NameOffset.second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; + if (useMD5()) { + for (auto Name : FuncsToUse) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + } else { + for (auto NameOffset : FuncOffsetTable) { + SampleContext FContext(NameOffset.first); + auto FuncName = FContext.getName(); + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) + continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } } } Data = End; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -203,7 +203,10 @@ } // Profiler tracker than manages profiles and its associated context -SampleContextTracker::SampleContextTracker(SampleProfileMap &Profiles) { +SampleContextTracker::SampleContextTracker( + SampleProfileMap &Profiles, + const DenseMap *GUIDToFuncNameMap) + : GUIDToFuncNameMap(GUIDToFuncNameMap) { for (auto &FuncSample : Profiles) { FunctionSamples *FSamples = &FuncSample.second; SampleContext Context = FuncSample.first; @@ -227,6 +230,10 @@ return nullptr; CalleeName = FunctionSamples::getCanonicalFnName(CalleeName); + // Convert real function names to MD5 names, if the input profile is + // MD5-based. + std::string FGUID; + CalleeName = getRepInFormat(CalleeName, FunctionSamples::UseMD5, FGUID); // For indirect call, CalleeName will be empty, in which case the context // profile for callee with largest total samples will be returned. @@ -313,6 +320,11 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, bool MergeContext) { LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n"); + // Convert real function names to MD5 names, if the input profile is + // MD5-based. + std::string FGUID; + Name = getRepInFormat(Name, FunctionSamples::UseMD5, FGUID); + // Base profile is top-level node (child of root node), so try to retrieve // existing top-level node for given function first. If it exists, it could be // that we've merged base profile before, or there's actually context-less @@ -416,6 +428,13 @@ void SampleContextTracker::dump() { RootContext.dumpTree(); } +StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const { + if (!FunctionSamples::UseMD5) + return Node->getFuncName(); + assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); + return GUIDToFuncNameMap->lookup(std::stoull(Node->getFuncName().data())); +} + ContextTrieNode * SampleContextTracker::getContextFor(const SampleContext &Context) { return getOrCreateContextPath(Context, false); @@ -458,6 +477,17 @@ RootName = PrevDIL->getScope()->getSubprogram()->getName(); S.push_back(std::make_pair(LineLocation(0, 0), RootName)); + // Convert real function names to MD5 names, if the input profile is + // MD5-based. + std::vector MD5Names; + if (FunctionSamples::UseMD5) { + for (auto &Location : S) { + MD5Names.emplace_back(); + getRepInFormat(Location.second, FunctionSamples::UseMD5, MD5Names.back()); + Location.second = MD5Names.back(); + } + } + ContextTrieNode *ContextNode = &RootContext; int I = S.size(); while (--I >= 0 && ContextNode) { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -984,7 +984,7 @@ Function *Func = SymbolMap.lookup(Name); // Add to the import list only when it's defined out of module. if (!Func || Func->isDeclaration()) - InlinedGUIDs.insert(FunctionSamples::getGUID(Name)); + InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName())); // Import hot CallTargets, which may not be available in IR because full // profile annotation cannot be done until backend compilation in ThinLTO. @@ -994,7 +994,7 @@ StringRef CalleeName = CalleeSample->getFuncName(TS.getKey()); const Function *Callee = SymbolMap.lookup(CalleeName); if (!Callee || Callee->isDeclaration()) - InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName)); + InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey())); } // Import hot child context profile associted with callees. Note that this @@ -1832,8 +1832,8 @@ UseIterativeBFIInference = true; // Tracker for profiles under different context - ContextTracker = - std::make_unique(Reader->getProfiles()); + ContextTracker = std::make_unique( + Reader->getProfiles(), &GUIDToFuncNameMap); } // Load pseudo probe descriptors for probe-based function samples. diff --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll @@ -2,6 +2,8 @@ ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -S | FileCheck %s ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/csspgo-import-list.prof -o %t.prof ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof -S | FileCheck %s +; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/csspgo-import-list.prof -o %t.md5 +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 -S | FileCheck %s declare i32 @_Z5funcBi(i32 %x) declare i32 @_Z5funcAi(i32 %x) diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll @@ -2,6 +2,9 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s +; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/indirect-call-csspgo.prof -o %t.md5 +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s + define void @test(void ()*) #0 !dbg !3 { ;; Add two direct call to force top-down order for sample profile loader diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -6,7 +6,10 @@ ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; + +; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE + ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW ; diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test @@ -2,8 +2,13 @@ ; REQUIRES: x86_64-linux ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-AGG-UNWINDER -; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t1 --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-AGG-UNWINDER +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-AGG +; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t2 --profile-summary-cold-count=0 -use-md5 +; RUN: llvm-profdata show --sample -show-sec-info-only %t2 | FileCheck %s --check-prefix=CHECK-MD5 +; RUN: llvm-profdata show --sample -detailed-summary --function=dummy %t > %t.summary +; RUN: llvm-profdata show --sample -detailed-summary --function=dummy %t2 > %t2.summary +; RUN: diff -b %t.summary %t2.summary ; CHECK-AGG:[main:1 @ foo]:108:0 ; CHECK-AGG: 2: 6 @@ -69,6 +74,7 @@ ; CHECK-UNWINDER: (5d7, 5e5): 2 ; CHECK-UNWINDER: (5e9, 634): 3 +; CHECK-MD5: NameTableSection {{.*}} {fixlenmd5} diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -42,9 +42,12 @@ CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, uint64_t HotThreshold, uint64_t ColdThreshold) - : UseContextCost(UseContextCostForPreInliner), ContextTracker(Profiles), - ProfileMap(Profiles), Binary(Binary), HotCountThreshold(HotThreshold), - ColdCountThreshold(ColdThreshold) {} + : UseContextCost(UseContextCostForPreInliner), + // TODO: Pass in a guid-to-name map in order for + // ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes + // as their profile context. + ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary), + HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {} std::vector CSPreInliner::buildTopDownOrder() { std::vector Order; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -27,6 +27,11 @@ clEnumValN(SPF_GCC, "gcc", "GCC encoding (only meaningful for -sample)"))); +cl::opt UseMD5( + "use-md5", cl::init(false), cl::Hidden, + cl::desc("Use md5 to represent function names in the output profile (only " + "meaningful for -extbinary)")); + static cl::opt RecursionCompression( "compress-recursion", cl::desc("Compressing recursion by deduplicating adjacent frame " @@ -99,6 +104,15 @@ auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); if (std::error_code EC = WriterOrErr.getError()) exitWithError(EC, OutputFilename); + + if (UseMD5) { + if (OutputFormat != SPF_Ext_Binary) + WithColor::warning() << "-use-md5 is ignored. Specify " + "--format=extbinary to enable it\n"; + else + WriterOrErr.get()->setUseMD5(); + } + write(std::move(WriterOrErr.get()), ProfileMap); }