diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -153,7 +153,8 @@ .. option:: -use-md5=[true|false] Use MD5 to represent string in name table when writing the profile. - This option can only be used with sample-based profile in extbinary format. + This option can only be used with sample-based profile in extbinary format + for non-cs profile. .. option:: -gen-partial-profile=[true|false] diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -482,6 +482,8 @@ StringRef getNameWithoutContext() const { return Name; } StringRef getCallingContext() const { return CallingContext; } StringRef getNameWithContext() const { return FullContext; } + uint64_t getGUID() const; + std::string getContextInMd5() const; private: // Give a context string, decode and populate internal states like @@ -846,11 +848,10 @@ /// by looking up in the function map GUIDToFuncNameMap. /// If the original name doesn't exist in the map, return empty StringRef. StringRef getFuncName(StringRef Name) const { - if (!UseMD5) + if (!isGUID(Name)) return Name; - assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); - return GUIDToFuncNameMap->lookup(std::stoull(Name.data())); + return GUIDToFuncNameMap->lookup(FunctionSamples::getGUID(Name)); } /// Returns the line offset to the start line of the subprogram. @@ -905,7 +906,17 @@ // If UseMD5 is true, the name is already a GUID and we // don't want to return the GUID of GUID. static uint64_t getGUID(StringRef Name) { - return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name); + uint64_t GUID; + if (!Name.getAsInteger(0, GUID)) + return GUID; + if (!Name.getAsInteger(10, GUID)) + return GUID; + return Function::getGUID(Name); + } + + static bool isGUID(StringRef Name) { + uint64_t GUID; + return !Name.getAsInteger(0, GUID) || !Name.getAsInteger(10, GUID); } // Find all the names in the current FunctionSamples including names in diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -56,7 +56,7 @@ std::queue Queue; for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) { ContextTrieNode *Callee = &Child.second; - addProfiledFunction(Callee->getFuncName()); + addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); } @@ -72,9 +72,10 @@ // context-based one, which may in turn block context-based inlining. for (auto &Child : Caller->getAllChildContext()) { ContextTrieNode *Callee = &Child.second; - addProfiledFunction(Callee->getFuncName()); + addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); - addProfiledCall(Caller->getFuncName(), Callee->getFuncName()); + addProfiledCall(ContextTracker.getFuncNameFor(Caller), + ContextTracker.getFuncNameFor(Callee)); } } } diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -23,56 +23,58 @@ #include "llvm/ProfileData/SampleProf.h" #include #include +#include +#include #include using namespace llvm; using namespace sampleprof; namespace llvm { - +class SampleContextTracker; // Internal trie tree representation used for tracking context tree and sample // profiles. The path from root node to a given node represents the context of // that nodes' profile. class ContextTrieNode { public: - ContextTrieNode(ContextTrieNode *Parent = nullptr, - StringRef FName = StringRef(), + ContextTrieNode(ContextTrieNode *Parent = nullptr, uint64_t GUID = 0, FunctionSamples *FSamples = nullptr, LineLocation CallLoc = {0, 0}) - : ParentContext(Parent), FuncName(FName), FuncSamples(FSamples), + : ParentContext(Parent), GUID(GUID), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; ContextTrieNode *getChildContext(const LineLocation &CallSite, - StringRef CalleeName); + uint64_t CalleeGUID); ContextTrieNode *getHottestChildContext(const LineLocation &CallSite); ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, - StringRef CalleeName, + uint64_t CalleeGUID, bool AllowCreate = true); ContextTrieNode &moveToChildContext(const LineLocation &CallSite, ContextTrieNode &&NodeToMove, StringRef ContextStrToRemove, + SampleContextTracker &ContextTracker, bool DeleteNode = true); - void removeChildContext(const LineLocation &CallSite, StringRef CalleeName); - std::map &getAllChildContext(); - StringRef getFuncName() const; + void removeChildContext(const LineLocation &CallSite, uint64_t CalleeGUID); + std::map &getAllChildContext(); + uint64_t getFuncGUID() const; FunctionSamples *getFunctionSamples() const; void setFunctionSamples(FunctionSamples *FSamples); LineLocation getCallSiteLoc() const; ContextTrieNode *getParentContext() const; void setParentContext(ContextTrieNode *Parent); - void dump(); + void dump(SampleContextTracker &ContextTracker); private: - static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite); + static uint64_t nodeHash(uint64_t ChildGUID, const LineLocation &Callsite); // Map line+discriminator location to child context - std::map AllChildContext; + std::map AllChildContext; // Link to parent context node ContextTrieNode *ParentContext; - // Function name for current context - StringRef FuncName; + // Function GUID for current context + uint64_t GUID; // Function Samples for current context FunctionSamples *FuncSamples; @@ -90,13 +92,14 @@ // calling context and the context is identified by path from root to the node. class SampleContextTracker { public: - using ContextSamplesTy = SmallVector; + using ContextSamplesTy = std::map; - SampleContextTracker(StringMap &Profiles); + SampleContextTracker(StringMap &Profiles, + DenseMap &GUIDToFuncNameMap); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, - StringRef CalleeName); + uint64_t CalleeGUID); // Get samples for indirect call targets for call site at given location. std::vector getIndirectCalleeContextSamplesFor(const DILocation *DIL); @@ -107,13 +110,13 @@ FunctionSamples *getContextSamplesFor(const SampleContext &Context); // Get all context profile for given function. ContextSamplesTy &getAllContextSamplesFor(const Function &Func); - ContextSamplesTy &getAllContextSamplesFor(StringRef Name); + ContextSamplesTy &getAllContextSamplesFor(uint64_t GUID); // Query base profile for a given function. A base profile is a merged view // of all context profiles for contexts that are not inlined. FunctionSamples *getBaseSamplesFor(const Function &Func, bool MergeContext = true); - // Query base profile for a given function by name. - FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true); + // Query base profile for a given function by GUID. + FunctionSamples *getBaseSamplesFor(uint64_t GUID, bool MergeContext = true); // Retrieve the context trie node for given profile context ContextTrieNode *getContextFor(const SampleContext &Context); // Mark a context profile as inlined when function is inlined. @@ -122,18 +125,20 @@ void markContextSamplesInlined(const FunctionSamples *InlinedSamples); ContextTrieNode &getRootContext(); void promoteMergeContextSamplesTree(const Instruction &Inst, - StringRef CalleeName); + uint64_t CalleeGUID); + StringRef getFuncNameFor(ContextTrieNode *Node) const; + std::string getContextInRealName(SampleContext &Context); // Dump the internal context profile trie. void dump(); private: ContextTrieNode *getContextFor(const DILocation *DIL); ContextTrieNode *getCalleeContextFor(const DILocation *DIL, - StringRef CalleeName); + uint64_t CalleeGUID); ContextTrieNode *getOrCreateContextPath(const SampleContext &Context, bool AllowCreate); - ContextTrieNode *getTopLevelContextNode(StringRef FName); - ContextTrieNode &addTopLevelContextNode(StringRef FName); + ContextTrieNode *getTopLevelContextNode(uint64_t GUID); + ContextTrieNode &addTopLevelContextNode(uint64_t GUID); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, StringRef ContextStrToRemove); @@ -142,7 +147,9 @@ StringRef ContextStrToRemove); // Map from function name to context profiles (excluding base profile) - StringMap FuncToCtxtProfiles; + std::unordered_map FuncToCtxtProfiles; + + DenseMap &GUIDToFuncNameMap; // Root node for context trie tree ContextTrieNode RootContext; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -151,6 +151,34 @@ return OS; } +std::string SampleContext::getContextInMd5() const { + std::string Md5Context; + StringRef ContextRemain = FullContext; + StringRef ChildContext; + StringRef CalleeName; + while (!ContextRemain.empty()) { + auto ContextSplit = SampleContext::splitContextString(ContextRemain); + ChildContext = ContextSplit.first; + ContextRemain = ContextSplit.second; + LineLocation NextCallSiteLoc(0, 0); + SampleContext::decodeContextString(ChildContext, CalleeName, + NextCallSiteLoc); + auto CalleeGUID = FunctionSamples::getGUID(CalleeName); + if (!Md5Context.empty()) + Md5Context += " @ "; + Md5Context += "0x" + Twine::utohexstr(CalleeGUID).str(); + if (NextCallSiteLoc.LineOffset) + Md5Context += ":" + Twine(NextCallSiteLoc.LineOffset).str(); + if (NextCallSiteLoc.Discriminator) + Md5Context += "." + Twine(NextCallSiteLoc.Discriminator).str(); + } + return Md5Context; +} + +uint64_t SampleContext::getGUID() const { + return FunctionSamples::getGUID(Name); +} + /// Print the samples collected for a function on stream \p OS. void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { if (getFunctionHash()) diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -747,14 +747,16 @@ OrderedNames.insert(Name.first); } + DenseSet FuncGuidsToUse; + for (auto Name : FuncsToUse) + FuncGuidsToUse.insert(Function::getGUID(Name)); + // For each function in current module, load all // context profiles for the function. for (auto NameOffset : FuncOffsetTable) { StringRef ContextName = NameOffset.first; SampleContext FContext(ContextName); - auto FuncName = FContext.getNameWithoutContext(); - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) + if (!FuncGuidsToUse.count(FContext.getGUID())) continue; // For each context profile we need, try to load diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -190,7 +190,8 @@ } std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() { - if (!UseMD5) + // The name table of MD5-based CS profile is handled same way with non-MD5. + if (!UseMD5 || FunctionSamples::ProfileIsCS) return SampleProfileWriterBinary::writeNameTable(); auto &OS = *OutputStream; @@ -216,13 +217,19 @@ addNames(I.second); } - // If NameTable contains ".__uniq." suffix, set SecFlagUniqSuffix flag - // so compiler won't strip the suffix during profile matching after - // seeing the flag in the profile. - for (const auto &I : NameTable) { - if (I.first.find(FunctionSamples::UniqSuffix) != StringRef::npos) { - addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix); - break; + if (FunctionSamples::ProfileIsCS) { + // Since there's no way to tell if md5 names have the ".__uniq." suffix or + // not, the flag is set to prevent the compiler stripping the suffix. + addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix); + } else { + // If NameTable contains ".__uniq." suffix, set SecFlagUniqSuffix flag + // so compiler won't strip the suffix during profile matching after + // seeing the flag in the profile. + for (const auto &I : NameTable) { + if (I.first.find(FunctionSamples::UniqSuffix) != StringRef::npos) { + addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix); + break; + } } } @@ -388,9 +395,13 @@ /// it needs to be parsed by the SampleProfileReaderText class. std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { auto &OS = *OutputStream; - if (FunctionSamples::ProfileIsCS) - OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples(); - else + if (FunctionSamples::ProfileIsCS) { + if (FunctionSamples::UseMD5) + OS << "[" << S.getContext().getContextInMd5() + << "]:" << S.getTotalSamples(); + else + OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples(); + } else OS << S.getName() << ":" << S.getTotalSamples(); if (Indent == 0) @@ -409,8 +420,14 @@ OS << Sample.getSamples(); - for (const auto &J : Sample.getSortedCallTargets()) - OS << " " << J.first << ":" << J.second; + if (FunctionSamples::ProfileIsCS && FunctionSamples::UseMD5) { + for (const auto &J : Sample.getSortedCallTargets()) + OS << " 0x" << Twine::utohexstr(FunctionSamples::getGUID(J.first)) + << ":" << J.second; + } else { + for (const auto &J : Sample.getSortedCallTargets()) + OS << " " << J.first << ":" << J.second; + } OS << "\n"; } @@ -448,9 +465,15 @@ std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName, bool IsContextName) { std::string BracketedName; + if (FunctionSamples::ProfileIsCS && FunctionSamples::UseMD5) { + SampleContext Context(FName); + BracketedName = Context.getContextInMd5(); + FName = BracketedName; + } + if (IsContextName) { BracketedName = "[" + FName.str() + "]"; - FName = StringRef(BracketedName); + FName = BracketedName; } const auto &Ret = NameTable.find(FName); @@ -461,9 +484,18 @@ } void SampleProfileWriterBinary::addName(StringRef FName, bool IsContextName) { + std::string BracketedName; + if (FunctionSamples::ProfileIsCS && FunctionSamples::UseMD5) { + SampleContext Context(FName); + BracketedName = Context.getContextInMd5(); + FName = BracketedName; + } if (IsContextName) { auto It = BracketedContextStr.insert("[" + FName.str() + "]"); FName = StringRef(*It.first); + } else if (!BracketedName.empty()) { + auto It = BracketedContextStr.insert(BracketedName); + FName = StringRef(*It.first); } NameTable.insert(std::make_pair(FName, 0)); } diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -28,11 +28,11 @@ namespace llvm { ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, - StringRef CalleeName) { - if (CalleeName.empty()) + uint64_t CalleeGUID) { + if (!CalleeGUID) return getHottestChildContext(CallSite); - uint32_t Hash = nodeHash(CalleeName, CallSite); + uint64_t Hash = nodeHash(CalleeGUID, CallSite); auto It = AllChildContext.find(Hash); if (It != AllChildContext.end()) return &It->second; @@ -64,8 +64,9 @@ ContextTrieNode &ContextTrieNode::moveToChildContext( const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - StringRef ContextStrToRemove, bool DeleteNode) { - uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite); + StringRef ContextStrToRemove, SampleContextTracker &ContextTracker, + bool DeleteNode) { + auto Hash = nodeHash(NodeToMove.getFuncGUID(), CallSite); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); LineLocation OldCallSite = NodeToMove.CallSiteLoc; ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); @@ -88,8 +89,10 @@ if (FSamples) { FSamples->getContext().promoteOnPath(ContextStrToRemove); FSamples->getContext().setState(SyntheticContext); - LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext() - << "\n"); + LLVM_DEBUG( + dbgs() << " Context promoted to: " + << ContextTracker.getContextInRealName(FSamples->getContext()) + << "\n"); } for (auto &It : Node->getAllChildContext()) { @@ -101,23 +104,23 @@ // Original context no longer needed, destroy if requested. if (DeleteNode) - OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); + OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncGUID()); return NewNode; } void ContextTrieNode::removeChildContext(const LineLocation &CallSite, - StringRef CalleeName) { - uint32_t Hash = nodeHash(CalleeName, CallSite); + uint64_t CalleeGUID) { + uint64_t Hash = nodeHash(CalleeGUID, CallSite); // Note this essentially calls dtor and destroys that child context AllChildContext.erase(Hash); } -std::map &ContextTrieNode::getAllChildContext() { +std::map &ContextTrieNode::getAllChildContext() { return AllChildContext; } -StringRef ContextTrieNode::getFuncName() const { return FuncName; } +uint64_t ContextTrieNode::getFuncGUID() const { return GUID; } FunctionSamples *ContextTrieNode::getFunctionSamples() const { return FuncSamples; @@ -137,33 +140,38 @@ ParentContext = Parent; } -void ContextTrieNode::dump() { - dbgs() << "Node: " << FuncName << "\n" +void ContextTrieNode::dump(SampleContextTracker &ContextTracker) { + dbgs() << "Node: " << ContextTracker.getFuncNameFor(this) << "\n" << " Callsite: " << CallSiteLoc << "\n" - << " Children:\n"; + << " Address: " << format("%8" PRIx64, this) << "\n" + << " Parent: " << format("%8" PRIx64, ParentContext) << "\n"; + + if (FuncSamples) + dbgs() << " HasProfile\n"; + + dbgs() << " Children:\n"; for (auto &It : AllChildContext) { - dbgs() << " Node: " << It.second.getFuncName() << "\n"; + dbgs() << " Node: " << ContextTracker.getFuncNameFor(&It.second) << "\n"; } } -uint32_t ContextTrieNode::nodeHash(StringRef ChildName, +uint64_t ContextTrieNode::nodeHash(uint64_t ChildGUID, const LineLocation &Callsite) { // We still use child's name for child hash, this is // because for children of root node, we don't have // different line/discriminator, and we'll rely on name // to differentiate children. - uint32_t NameHash = std::hash{}(ChildName.str()); uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator; - return NameHash + (LocId << 5) + LocId; + return ChildGUID + (LocId << 5) + LocId; } ContextTrieNode *ContextTrieNode::getOrCreateChildContext( - const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) { - uint32_t Hash = nodeHash(CalleeName, CallSite); + const LineLocation &CallSite, uint64_t CalleeGUID, bool AllowCreate) { + uint64_t Hash = nodeHash(CalleeGUID, CallSite); auto It = AllChildContext.find(Hash); if (It != AllChildContext.end()) { - assert(It->second.getFuncName() == CalleeName && + assert(It->second.getFuncGUID() == CalleeGUID && "Hash collision for child context node"); return &It->second; } @@ -171,19 +179,49 @@ if (!AllowCreate) return nullptr; - AllChildContext[Hash] = ContextTrieNode(this, CalleeName, nullptr, CallSite); + AllChildContext[Hash] = ContextTrieNode(this, CalleeGUID, nullptr, CallSite); return &AllChildContext[Hash]; } +std::string SampleContextTracker::getContextInRealName(SampleContext &Context) { + std::string RealContext; + StringRef ContextRemain = Context; + StringRef ChildContext; + StringRef CalleeName; + while (!ContextRemain.empty()) { + auto ContextSplit = SampleContext::splitContextString(ContextRemain); + ChildContext = ContextSplit.first; + ContextRemain = ContextSplit.second; + LineLocation NextCallSiteLoc(0, 0); + SampleContext::decodeContextString(ChildContext, CalleeName, + NextCallSiteLoc); + if (FunctionSamples::isGUID(CalleeName)) + CalleeName = GUIDToFuncNameMap[FunctionSamples::getGUID(CalleeName)]; + if (!RealContext.empty()) + RealContext += " @ "; + RealContext += CalleeName; + if (NextCallSiteLoc.LineOffset) + RealContext += ":" + Twine(NextCallSiteLoc.LineOffset).str(); + if (NextCallSiteLoc.Discriminator) + RealContext += "." + Twine(NextCallSiteLoc.Discriminator).str(); + } + return RealContext; +} + // Profiler tracker than manages profiles and its associated context SampleContextTracker::SampleContextTracker( - StringMap &Profiles) { + StringMap &Profiles, + DenseMap &GUIDToFuncNameMap) + : GUIDToFuncNameMap(GUIDToFuncNameMap) { for (auto &FuncSample : Profiles) { FunctionSamples *FSamples = &FuncSample.second; SampleContext Context(FuncSample.first(), RawContext); - LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n"); - if (!Context.isBaseContext()) - FuncToCtxtProfiles[Context.getNameWithoutContext()].push_back(FSamples); + LLVM_DEBUG(dbgs() << "Tracking Context for function: " + << getContextInRealName(Context) << "\n"); + if (!Context.isBaseContext()) { + auto Hash = MD5Hash(Context.getContextInMd5()); + FuncToCtxtProfiles[Context.getGUID()].emplace(Hash, FSamples); + } ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); @@ -193,21 +231,20 @@ FunctionSamples * SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, - StringRef CalleeName) { + uint64_t CalleeGUID) { LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n"); DILocation *DIL = Inst.getDebugLoc(); if (!DIL) return nullptr; - CalleeName = FunctionSamples::getCanonicalFnName(CalleeName); - // For indirect call, CalleeName will be empty, in which case the context // profile for callee with largest total samples will be returned. - ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName); + ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeGUID); if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); LLVM_DEBUG(if (FSamples) { - dbgs() << " Callee context found: " << FSamples->getContext() << "\n"; + dbgs() << " Callee context found: " + << getContextInRealName(FSamples->getContext()) << "\n"; }); return FSamples; } @@ -268,37 +305,41 @@ SampleContextTracker::ContextSamplesTy & SampleContextTracker::getAllContextSamplesFor(const Function &Func) { StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); - return FuncToCtxtProfiles[CanonName]; + uint64_t GUID = Function::getGUID(CanonName); + return FuncToCtxtProfiles[GUID]; } SampleContextTracker::ContextSamplesTy & -SampleContextTracker::getAllContextSamplesFor(StringRef Name) { - return FuncToCtxtProfiles[Name]; +SampleContextTracker::getAllContextSamplesFor(uint64_t GUID) { + return FuncToCtxtProfiles[GUID]; } FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func, bool MergeContext) { StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); - return getBaseSamplesFor(CanonName, MergeContext); + uint64_t GUID = Function::getGUID(CanonName); + return getBaseSamplesFor(GUID, MergeContext); } -FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, +FunctionSamples *SampleContextTracker::getBaseSamplesFor(uint64_t GUID, bool MergeContext) { - LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n"); + LLVM_DEBUG(dbgs() << "Getting base profile for function: " + << GUIDToFuncNameMap[GUID] << "\n"); + // Base profile is top-level node (child of root node), so try to retrieve // existing top-level node for given function first. If it exists, it could be // that we've merged base profile before, or there's actually context-less // profile from the input (e.g. due to unreliable stack walking). - ContextTrieNode *Node = getTopLevelContextNode(Name); + ContextTrieNode *Node = getTopLevelContextNode(GUID); if (MergeContext) { - LLVM_DEBUG(dbgs() << " Merging context profile into base profile: " << Name - << "\n"); + LLVM_DEBUG(dbgs() << " Merging context profile into base profile: " + << GUIDToFuncNameMap[GUID] << "\n"); // We have profile for function under different contexts, // create synthetic base profile and merge context profiles // into base profile. - for (auto *CSamples : FuncToCtxtProfiles[Name]) { - SampleContext &Context = CSamples->getContext(); + for (auto &CSamples : FuncToCtxtProfiles[GUID]) { + SampleContext &Context = CSamples.second->getContext(); ContextTrieNode *FromNode = getContextFor(Context); if (FromNode == Node) continue; @@ -331,7 +372,7 @@ ContextTrieNode &SampleContextTracker::getRootContext() { return RootContext; } void SampleContextTracker::promoteMergeContextSamplesTree( - const Instruction &Inst, StringRef CalleeName) { + const Instruction &Inst, uint64_t CalleeGUID) { LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n" << Inst << "\n"); // Get the caller context for the call instruction, we don't use callee @@ -345,7 +386,7 @@ LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); // For indirect call, CalleeName will be empty, in which case we need to // promote all non-inlined child context profiles. - if (CalleeName.empty()) { + if (!CalleeGUID) { for (auto &It : CallerNode->getAllChildContext()) { ContextTrieNode *NodeToPromo = &It.second; if (CallSite != NodeToPromo->getCallSiteLoc()) @@ -360,7 +401,7 @@ // Get the context for the given callee that needs to be promoted ContextTrieNode *NodeToPromo = - CallerNode->getChildContext(CallSite, CalleeName); + CallerNode->getChildContext(CallSite, CalleeGUID); if (!NodeToPromo) return; @@ -376,7 +417,7 @@ FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); assert(FromSamples && "Shouldn't promote a context without profile"); LLVM_DEBUG(dbgs() << " Found context tree root to promote: " - << FromSamples->getContext() << "\n"); + << getContextInRealName(FromSamples->getContext()) << "\n"); assert(!FromSamples->getContext().hasState(InlinedContext) && "Shouldn't promote inlined context profile"); @@ -385,6 +426,10 @@ ContextStrToRemove); } +StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const { + return GUIDToFuncNameMap[Node->getFuncGUID()]; +} + void SampleContextTracker::dump() { dbgs() << "Context Profile Tree:\n"; std::queue NodeQueue; @@ -393,7 +438,7 @@ while (!NodeQueue.empty()) { ContextTrieNode *Node = NodeQueue.front(); NodeQueue.pop(); - Node->dump(); + Node->dump(*this); for (auto &It : Node->getAllChildContext()) { ContextTrieNode *ChildNode = &It.second; @@ -409,7 +454,7 @@ ContextTrieNode * SampleContextTracker::getCalleeContextFor(const DILocation *DIL, - StringRef CalleeName) { + uint64_t CalleeGUID) { assert(DIL && "Expect non-null location"); ContextTrieNode *CallContext = getContextFor(DIL); @@ -419,12 +464,12 @@ // When CalleeName is empty, the child context profile with max // total samples will be returned. return CallContext->getChildContext( - FunctionSamples::getCallSiteIdentifier(DIL), CalleeName); + FunctionSamples::getCallSiteIdentifier(DIL), CalleeGUID); } ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { assert(DIL && "Expect non-null location"); - SmallVector, 10> S; + SmallVector, 10> S; // Use C++ linkage name if possible. const DILocation *PrevDIL = DIL; @@ -432,8 +477,8 @@ StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName(); if (Name.empty()) Name = PrevDIL->getScope()->getSubprogram()->getName(); - S.push_back( - std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL), Name)); + S.push_back(std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL), + Function::getGUID(Name))); PrevDIL = DIL; } @@ -442,14 +487,14 @@ StringRef RootName = PrevDIL->getScope()->getSubprogram()->getLinkageName(); if (RootName.empty()) RootName = PrevDIL->getScope()->getSubprogram()->getName(); - S.push_back(std::make_pair(LineLocation(0, 0), RootName)); + S.push_back(std::make_pair(LineLocation(0, 0), Function::getGUID(RootName))); ContextTrieNode *ContextNode = &RootContext; int I = S.size(); while (--I >= 0 && ContextNode) { LineLocation &CallSite = S[I].first; - StringRef &CalleeName = S[I].second; - ContextNode = ContextNode->getChildContext(CallSite, CalleeName); + uint64_t &CalleeGUID = S[I].second; + ContextNode = ContextNode->getChildContext(CallSite, CalleeGUID); } if (I < 0) @@ -474,13 +519,14 @@ LineLocation NextCallSiteLoc(0, 0); SampleContext::decodeContextString(ChildContext, CalleeName, NextCallSiteLoc); + auto CalleeGUID = FunctionSamples::getGUID(CalleeName); // Create child node at parent line/disc location if (AllowCreate) { ContextNode = - ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName); + ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeGUID); } else { - ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName); + ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeGUID); } CallSiteLoc = NextCallSiteLoc; } @@ -490,14 +536,9 @@ return ContextNode; } -ContextTrieNode *SampleContextTracker::getTopLevelContextNode(StringRef FName) { - assert(!FName.empty() && "Top level node query must provide valid name"); - return RootContext.getChildContext(LineLocation(0, 0), FName); -} - -ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) { - assert(!getTopLevelContextNode(FName) && "Node to add must not exist"); - return *RootContext.getOrCreateChildContext(LineLocation(0, 0), FName); +ContextTrieNode *SampleContextTracker::getTopLevelContextNode(uint64_t GUID) { + assert(GUID && "Top level node query must provide valid name"); + return RootContext.getChildContext(LineLocation(0, 0), GUID); } void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, @@ -535,19 +576,21 @@ } // Locate destination node, create/move if not existing - ToNode = ToNodeParent.getChildContext(NewCallSiteLoc, FromNode.getFuncName()); + ToNode = ToNodeParent.getChildContext(NewCallSiteLoc, FromNode.getFuncGUID()); if (!ToNode) { // Do not delete node to move from its parent here because // caller is iterating over children of that parent node. ToNode = &ToNodeParent.moveToChildContext( - NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false); + NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, *this, false); } else { // Destination node exists, merge samples for the context tree mergeContextNode(FromNode, *ToNode, ContextStrToRemove); LLVM_DEBUG({ if (ToNode->getFunctionSamples()) dbgs() << " Context promoted and merged to: " - << ToNode->getFunctionSamples()->getContext() << "\n"; + << getContextInRealName( + ToNode->getFunctionSamples()->getContext()) + << "\n"; }); // Recursively promote and merge children @@ -563,7 +606,7 @@ // For root of subtree, remove itself from old parent too if (MoveToRoot) - FromNodeParent.removeChildContext(OldCallSiteLoc, ToNode->getFuncName()); + FromNodeParent.removeChildContext(OldCallSiteLoc, ToNode->getFuncGUID()); return *ToNode; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -246,7 +246,7 @@ DenseMap &GUIDToFuncNameMap) : CurrentReader(Reader), CurrentModule(M), CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { - if (!CurrentReader.useMD5()) + if (!CurrentReader.useMD5() && !CurrentReader.profileIsCS()) return; for (const auto &F : CurrentModule) { @@ -631,8 +631,12 @@ if (Function *Callee = Inst.getCalledFunction()) CalleeName = Callee->getName(); - if (ProfileIsCS) - return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName); + if (ProfileIsCS) { + uint64_t CalleeGUID = 0; + if (!CalleeName.empty()) + CalleeGUID = Function::getGUID(CalleeName); + return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeGUID); + } const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) @@ -982,7 +986,7 @@ Function *Func = SymbolMap.lookup(Name); // Add to the import list only when it's defined out of module. if (!Func || Func->isDeclaration()) - InlinedGUIDs.insert(FunctionSamples::getGUID(Name)); + InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName())); // Import hot CallTargets, which may not be available in IR because full // profile annotation cannot be done until backend compilation in ThinLTO. @@ -992,7 +996,7 @@ StringRef CalleeName = CalleeSample->getFuncName(TS.getKey()); const Function *Callee = SymbolMap.lookup(CalleeName); if (!Callee || Callee->isDeclaration()) - InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName)); + InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey())); } // Import hot child context profile associted with callees. Note that this @@ -1815,8 +1819,8 @@ UseIterativeBFIInference = true; // Tracker for profiles under different context - ContextTracker = - std::make_unique(Reader->getProfiles()); + ContextTracker = std::make_unique( + Reader->getProfiles(), GUIDToFuncNameMap); } // Load pseudo probe descriptors for probe-based function samples. diff --git a/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.md5.prof b/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.md5.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.md5.prof @@ -0,0 +1,34 @@ +[0xdb956436e78dd5fa:3 @ 0x3a529c5814aaf5e8]:23254:11 + 0: 10 + 1: 23250 + !Attributes: 0 +[0xdb956436e78dd5fa]:154:2 + 2: 12 + 3: 18 0x5790b5589256d455:11 + 3.1: 18 0x5790b5589256d455:19 + !Attributes: 0 +[0xdb956436e78dd5fa:3.1 @ 0x630ba95aaba8cb5]:120:7040 + 0: 7001 + 1: 19 0x62919f2827854931:9999 + 3: 12 + !Attributes: 0 +[0xdb956436e78dd5fa:2 @ 0x5790b5589256d455:2 @ 0xe4024261ef60ad54]:120:101 + 0: 99 + 1: 6 + 3: 97 + !Attributes: 0 +[0xdb956436e78dd5fa:2 @ 0x5790b5589256d455]:99:11 + 0: 10 + 1: 10 0x62919f2827854931:11 + 2: 287864 0xe4024261ef60ad54:315608 + 3: 24 + !Attributes: 0 +[0xdb956436e78dd5fa:3 @ 0x8848a048c0e66db8]:23:45201 + 0: 10 + 1: 23250 + !Attributes: 0 +[0xdb956436e78dd5fa:3.1 @ 0x630ba95aaba8cb5 @ 0x34f4c893b42f679a]:1:9010 + 0: 7001 + 1: 19 0xd847dc5c708801f8:9999 + 3: 12 + !Attributes: 0 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo-md5.prof b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo-md5.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo-md5.prof @@ -0,0 +1,14 @@ +[0x73d32146cd6b8f09]:63067:0 + 1: 3345 0x7f8d88fcc70a347b:2059 0xf129122801e64264:1398 + 2: 100 0xd29e2fe34de9ae40:102 + 3: 100 0xdd875e8eb83dc5d6:102 + !Attributes: 0 +[0x73d32146cd6b8f09:1 @ 0x7f8d88fcc70a347b]:4220:1200 + 14: 4220 + !Attributes: 0 +[0x73d32146cd6b8f09:2 @ 0xd29e2fe34de9ae40]:200:100 + 5: 100 + !Attributes: 0 +[0x73d32146cd6b8f09:1 @ 0xf129122801e64264]:200:100 + 1: 100 + !Attributes: 0 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-context-tracker-md5.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-tracker-md5.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-tracker-md5.prof @@ -0,0 +1,44 @@ +[0xdb956436e78dd5fa:3 @ 0x5790b5589256d455:1 @ 0x62919f2827854931]:1467299:11 + 0: 6 + 1: 6 + 3: 287884 + 4: 287864 0xe4024261ef60ad54:315608 + 15: 23 + !Attributes: 0 +[0xdb956436e78dd5fa:3.1 @ 0x630ba95aaba8cb5:1 @ 0x62919f2827854931]:500853:20 + 0: 15 + 1: 15 + 3: 74946 + 4: 74941 0xe4024261ef60ad54:82359 + 10: 23324 + 11: 23327 0xe4024261ef60ad54:25228 + 15: 11 + !Attributes: 0 +[0xdb956436e78dd5fa]:154:0 + 2: 12 + 3: 18 0x5790b5589256d455:11 + 3.1: 18 0x630ba95aaba8cb5:19 + !Attributes: 0 +[0x4881065a99b6216a:12 @ 0xdb956436e78dd5fa]:154:12 + 2: 12 + 3: 10 0x5790b5589256d455:7 + 3.1: 10 0x630ba95aaba8cb5:11 + !Attributes: 0 +[0xdb956436e78dd5fa:3.1 @ 0x630ba95aaba8cb5]:120:19 + 0: 19 + 1: 19 0x62919f2827854931:20 + 3: 12 + !Attributes: 0 +[0x4881065a99b6216a:10 @ 0x630ba95aaba8cb5]:120:10 + 0: 10 + 1: 10 + !Attributes: 0 +[0x2229d555d2aa470d:17 @ 0x630ba95aaba8cb5]:120:3 + 0: 3 + 1: 3 + !Attributes: 0 +[0xdb956436e78dd5fa:3 @ 0x5790b5589256d455]:99:11 + 0: 10 + 1: 10 0x62919f2827854931:11 + 3: 24 + !Attributes: 0 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll @@ -1,6 +1,7 @@ ; Make sure Import GUID list for ThinLTO properly set for CSSPGO ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -S | FileCheck %s ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof.extbin -S | FileCheck %s +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.md5.prof -S | FileCheck %s declare i32 @_Z5funcBi(i32 %x) declare i32 @_Z5funcAi(i32 %x) diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo-md5.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s define void @test(void ()*) #0 !dbg !3 { ;; Add two direct call to force top-down order for sample profile loader diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -6,6 +6,7 @@ ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker-md5.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE ; ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW diff --git a/llvm/test/tools/llvm-profdata/cs-sample-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-profile.test --- a/llvm/test/tools/llvm-profdata/cs-sample-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-profile.test @@ -2,3 +2,48 @@ RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext RUN: llvm-profdata merge --sample --extbinary %p/Inputs/cs-sample.proftext -o %t.prof && llvm-profdata merge --sample --text %t.prof -o %t1.proftext RUN: diff -b %t1.proftext %S/Inputs/cs-sample.proftext +RUN: llvm-profdata merge --sample --text --use-md5 -output=%t1.md5.proftext %t.proftext +RUN: llvm-profdata merge --sample --text --use-md5 -output=%t2.md5.proftext %t.prof +RUN: llvm-profdata merge --sample --extbinary --use-md5 -output=%t3.md5.prof %t.prof +RUN: llvm-profdata merge --sample --text --use-md5 -output=%t4.md5.proftext %t3.md5.prof +RUN: cat %t1.md5.proftext | FileCheck %s +RUN: cat %t2.md5.proftext | FileCheck %s +RUN: cat %t4.md5.proftext | FileCheck %s + + +CHECK-DAG: [0xdb956436e78dd5fa:3 @ 0x5790b5589256d455:1 @ 0x62919f2827854931]:1467299:11 +CHECK-DAG-NEXT: 0: 6 +CHECK-DAG-NEXT: 1: 6 +CHECK-DAG-NEXT: 3: 287884 +CHECK-DAG-NEXT: 4: 287864 0xe4024261ef60ad54:315608 +CHECK-DAG-NEXT: 15: 23 +CHECK-DAG: [0xdb956436e78dd5fa:3.1 @ 0x630ba95aaba8cb5:1 @ 0x62919f2827854931]:500853:20 +CHECK-DAG-NEXT: 0: 15 +CHECK-DAG-NEXT: 1: 15 +CHECK-DAG-NEXT: 3: 74946 +CHECK-DAG-NEXT: 4: 74941 0xe4024261ef60ad54:82359 +CHECK-DAG-NEXT: 10: 23324 +CHECK-DAG-NEXT: 11: 23327 0xe4024261ef60ad54:25228 +CHECK-DAG-NEXT: 15: 11 +CHECK-DAG: [0xdb956436e78dd5fa]:154:0 +CHECK-DAG-NEXT: 2: 12 +CHECK-DAG-NEXT: 3: 18 0x5790b5589256d455:11 +CHECK-DAG-NEXT: 3.1: 18 0x630ba95aaba8cb5:19 +CHECK-DAG: [0x4881065a99b6216a:12 @ 0xdb956436e78dd5fa]:154:12 +CHECK-DAG-NEXT: 2: 12 +CHECK-DAG-NEXT: 3: 10 0x5790b5589256d455:7 +CHECK-DAG-NEXT: 3.1: 10 0x630ba95aaba8cb5:11 +CHECK-DAG: [0xdb956436e78dd5fa:3.1 @ 0x630ba95aaba8cb5]:120:19 +CHECK-DAG-NEXT: 0: 19 +CHECK-DAG-NEXT: 1: 19 0x62919f2827854931:20 +CHECK-DAG-NEXT: 3: 12 +CHECK-DAG: [0x2229d555d2aa470d:17 @ 0x630ba95aaba8cb5]:120:3 +CHECK-DAG-NEXT: 0: 3 +CHECK-DAG-NEXT: 1: 3 +CHECK-DAG: [0x4881065a99b6216a:10 @ 0x630ba95aaba8cb5]:120:10 +CHECK-DAG-NEXT: 0: 10 +CHECK-DAG-NEXT: 1: 10 +CHECK-DAG: [0xdb956436e78dd5fa:3 @ 0x5790b5589256d455]:99:11 +CHECK-DAG-NEXT: 0: 10 +CHECK-DAG-NEXT: 1: 10 0x62919f2827854931:11 +CHECK-DAG-NEXT: 3: 24 diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test @@ -1,5 +1,7 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t2 --show-unwinder-output --profile-summary-cold-count=0 --use-md5 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-MD5 ; CHECK:[main:1 @ foo]:54:0 ; CHECK: 2: 3 @@ -11,6 +13,16 @@ ; CHECK: 4: 1 ; CHECK: 5: 3 +; CHECK-MD5:[0xdb956436e78dd5fa:1 @ 0x5cf8c24cdb18bdac]:54:0 +; CHECK-MD5: 2: 3 +; CHECK-MD5: 3: 3 0xe413754a191db537:3 +; CHECK-MD5:[0xdb956436e78dd5fa:1 @ 0x5cf8c24cdb18bdac:3 @ 0xe413754a191db537]:50:3 +; CHECK-MD5: 0: 3 +; CHECK-MD5: 1: 3 +; CHECK-MD5: 2: 2 +; CHECK-MD5: 4: 1 +; CHECK-MD5: 5: 3 + ; CHECK-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Range Counter: ; CHECK-UNWINDER: main:1 @ foo ; CHECK-UNWINDER: (5ff, 62f): 3 diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test @@ -1,5 +1,7 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t2 --show-unwinder-output --profile-summary-cold-count=0 --use-md5 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-MD5 ; CHECK: [main:2 @ foo]:75:0 ; CHECK-NEXT: 1: 0 @@ -18,6 +20,23 @@ ; CHECK-NEXT: !CFGChecksum: 72617220756 +; CHECK-MD5: [0xdb956436e78dd5fa:2 @ 0x5cf8c24cdb18bdac]:75:0 +; CHECK-MD5-NEXT: 1: 0 +; CHECK-MD5-NEXT: 2: 15 +; CHECK-MD5-NEXT: 3: 15 +; CHECK-MD5-NEXT: 4: 15 +; CHECK-MD5-NEXT: 5: 0 +; CHECK-MD5-NEXT: 6: 15 +; CHECK-MD5-NEXT: 7: 0 +; CHECK-MD5-NEXT: 8: 15 0xe413754a191db537:15 +; CHECK-MD5-NEXT: 9: 0 +; CHECK-MD5-NEXT: !CFGChecksum: 563088904013236 +; CHECK-MD5: [0xdb956436e78dd5fa:2 @ 0x5cf8c24cdb18bdac:8 @ 0xe413754a191db537]:30:15 +; CHECK-MD5-NEXT: 1: 15 +; CHECK-MD5-NEXT: 4: 15 +; CHECK-MD5-NEXT: !CFGChecksum: 72617220756 + + ; CHECK-UNWINDER: Binary(noinline-cs-pseudoprobe.perfbin)'s Range Counter: ; CHECK-UNWINDER-NEXT: main:2 ; CHECK-UNWINDER-NEXT: (79e, 7bf): 15 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -655,6 +655,7 @@ sampleprof::ProfileSymbolList &WriterList, bool CompressAllSections, bool UseMD5, bool GenPartialProfile) { + using namespace sampleprof; populateProfileSymbolList(Buffer, WriterList); if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) warn("Profile Symbol list is not empty but the output format is not " @@ -669,7 +670,9 @@ Writer.setToCompressAllSections(); } if (UseMD5) { - if (OutputFormat != PF_Ext_Binary) + if (FunctionSamples::ProfileIsCS) + FunctionSamples::UseMD5 = true; + else if (OutputFormat != PF_Ext_Binary) warn("-use-md5 is ignored. Specify -extbinary to enable it"); else Writer.setUseMD5(); diff --git a/llvm/tools/llvm-profgen/CSPreInliner.h b/llvm/tools/llvm-profgen/CSPreInliner.h --- a/llvm/tools/llvm-profgen/CSPreInliner.h +++ b/llvm/tools/llvm-profgen/CSPreInliner.h @@ -79,6 +79,7 @@ bool shouldInline(ProfiledInlineCandidate &Candidate); SampleContextTracker ContextTracker; StringMap &ProfileMap; + DenseMap GUIDToFuncNameMap; // Count thresholds to answer isHotCount and isColdCount queries. // Mirrors the threshold in ProfileSummaryInfo. diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -32,8 +32,26 @@ CSPreInliner::CSPreInliner(StringMap &Profiles, uint64_t HotThreshold, uint64_t ColdThreshold) - : ContextTracker(Profiles), ProfileMap(Profiles), - HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {} + : ContextTracker(Profiles, GUIDToFuncNameMap), ProfileMap(Profiles), + HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) { + // Populate GUIDToFuncNameMap + for (auto &FuncSample : Profiles) { + SampleContext Context(FuncSample.first(), RawContext); + StringRef ContextRemain = Context; + StringRef ChildContext; + StringRef CalleeName; + while (!ContextRemain.empty()) { + auto ContextSplit = SampleContext::splitContextString(ContextRemain); + ChildContext = ContextSplit.first; + ContextRemain = ContextSplit.second; + LineLocation NextCallSiteLoc(0, 0); + SampleContext::decodeContextString(ChildContext, CalleeName, + NextCallSiteLoc); + auto CalleeGUID = FunctionSamples::getGUID(CalleeName); + GUIDToFuncNameMap[CalleeGUID] = CalleeName; + } + } +} std::vector CSPreInliner::buildTopDownOrder() { std::vector Order; @@ -118,7 +136,8 @@ LLVM_DEBUG(dbgs() << "Process " << Name << " for context-sensitive pre-inlining\n"); - FunctionSamples *FSamples = ContextTracker.getBaseSamplesFor(Name); + FunctionSamples *FSamples = + ContextTracker.getBaseSamplesFor(FunctionSamples::getGUID(Name)); if (!FSamples) return; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -25,6 +25,10 @@ clEnumValN(SPF_GCC, "gcc", "GCC encoding (only meaningful for -sample)"))); +cl::opt UseMD5( + "use-md5", cl::Hidden, cl::init(false), + cl::desc("Use md5 to represent function names in the output profile")); + static cl::opt RecursionCompression( "compress-recursion", cl::desc("Compressing recursion by deduplicating adjacent frame " @@ -220,6 +224,7 @@ void CSProfileGenerator::generateProfile() { FunctionSamples::ProfileIsCS = true; + FunctionSamples::UseMD5 = UseMD5; for (const auto &BI : BinarySampleCounters) { ProfiledBinary *Binary = BI.first; for (const auto &CI : BI.second) { @@ -454,6 +459,7 @@ // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; FunctionSamples::ProfileIsCS = true; + FunctionSamples::UseMD5 = UseMD5; for (const auto &BI : BinarySampleCounters) { ProfiledBinary *Binary = BI.first; for (const auto &CI : BI.second) {