diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -553,16 +553,6 @@ } } - // Promote context by removing top frames with the length of - // `ContextFramesToRemove`. Note that with array representation of context, - // the promotion is effectively a slice operation with first - // `ContextFramesToRemove` elements removed from left. - void promoteOnPath(uint32_t ContextFramesToRemove) { - assert(ContextFramesToRemove <= FullContext.size() && - "Cannot remove more than the whole context"); - FullContext = FullContext.drop_front(ContextFramesToRemove); - } - // Decode context string for a frame to get function name and location. // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. static void decodeContextString(StringRef ContextStr, StringRef &FName, diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/SampleProf.h" #include +#include #include namespace llvm { @@ -44,11 +45,6 @@ ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, StringRef ChildName, bool AllowCreate = true); - - ContextTrieNode &moveToChildContext(const LineLocation &CallSite, - ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, - bool DeleteNode = true); void removeChildContext(const LineLocation &CallSite, StringRef ChildName); std::map &getAllChildContext(); StringRef getFuncName() const; @@ -59,6 +55,7 @@ LineLocation getCallSiteLoc() const; ContextTrieNode *getParentContext() const; void setParentContext(ContextTrieNode *Parent); + void setCallSiteLoc(const LineLocation &Loc); void dumpNode(); void dumpTree(); @@ -91,23 +88,13 @@ // calling context and the context is identified by path from root to the node. class SampleContextTracker { public: - struct ProfileComparer { - bool operator()(FunctionSamples *A, FunctionSamples *B) const { - // Sort function profiles by the number of total samples and their - // contexts. - if (A->getTotalSamples() == B->getTotalSamples()) - return A->getContext() < B->getContext(); - return A->getTotalSamples() > B->getTotalSamples(); - } - }; - - // Keep profiles of a function sorted so that they will be processed/promoted - // deterministically. - using ContextSamplesTy = std::set; + using ContextSamplesTy = std::vector; SampleContextTracker() = default; SampleContextTracker(SampleProfileMap &Profiles, const DenseMap *GUIDToFuncNameMap); + // Populate the FuncToCtxtProfiles map after the trie is built. + void populateFuncToCtxtMap(); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, @@ -145,6 +132,61 @@ // Create a merged conext-less profile map. void createContextLessProfileMap(SampleProfileMap &ContextLessProfiles); + ContextTrieNode * + getContextNodeForProfile(const FunctionSamples *FSamples) const { + auto I = ProfileToNodeMap.find(FSamples); + if (I == ProfileToNodeMap.end()) + return nullptr; + return I->second; + } + StringMap &getFuncToCtxtProfiles() { + return FuncToCtxtProfiles; + } + + class Iterator : public std::iterator { + std::queue NodeQueue; + + public: + explicit Iterator() = default; + explicit Iterator(ContextTrieNode *Node) { NodeQueue.push(Node); } + Iterator &operator++() { + assert(!NodeQueue.empty() && "Iterator already at the end"); + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); + return *this; + } + + Iterator operator++(int) { + assert(!NodeQueue.empty() && "Iterator already at the end"); + Iterator Ret = *this; + ++(*this); + return Ret; + } + bool operator==(const Iterator &Other) const { + if (NodeQueue.empty() && Other.NodeQueue.empty()) + return true; + if (NodeQueue.empty() || Other.NodeQueue.empty()) + return false; + return NodeQueue.front() == Other.NodeQueue.front(); + } + bool operator!=(const Iterator &Other) const { return !(*this == Other); } + ContextTrieNode *operator*() const { + assert(!NodeQueue.empty() && "Invalid access to end iterator"); + return NodeQueue.front(); + } + }; + + Iterator begin() { return Iterator(&RootContext); } + Iterator end() { return Iterator(); } + +#ifndef NDEBUG + // Get a context string from root to current node. + std::string getContextString(const FunctionSamples &FSamples) const; + std::string getContextString(ContextTrieNode *Node) const; +#endif // Dump the internal context profile trie. void dump(); @@ -155,15 +197,23 @@ ContextTrieNode *getTopLevelContextNode(StringRef FName); ContextTrieNode &addTopLevelContextNode(StringRef FName); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); - void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove); + void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode); ContextTrieNode & promoteMergeContextSamplesTree(ContextTrieNode &FromNode, - ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove); + ContextTrieNode &ToNodeParent); + ContextTrieNode &moveContextSamples(ContextTrieNode &ToNodeParent, + const LineLocation &CallSite, + ContextTrieNode &&NodeToMove); + void setContextNode(const FunctionSamples *FSample, ContextTrieNode *Node) { + ProfileToNodeMap[FSample] = Node; + } // Map from function name to context profiles (excluding base profile) StringMap FuncToCtxtProfiles; + // Map from current FunctionSample to the belonged context trie. + std::unordered_map + ProfileToNodeMap; + // Map from function guid to real function names. Only used in md5 mode. const DenseMap *GUIDToFuncNameMap; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -63,23 +63,24 @@ return ChildNodeRet; } -ContextTrieNode &ContextTrieNode::moveToChildContext( - const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, bool DeleteNode) { +ContextTrieNode & +SampleContextTracker::moveContextSamples(ContextTrieNode &ToNodeParent, + const LineLocation &CallSite, + ContextTrieNode &&NodeToMove) { uint64_t Hash = FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite); + std::map &AllChildContext = + ToNodeParent.getAllChildContext(); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); - LineLocation OldCallSite = NodeToMove.CallSiteLoc; - ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); AllChildContext[Hash] = NodeToMove; ContextTrieNode &NewNode = AllChildContext[Hash]; - NewNode.CallSiteLoc = CallSite; + NewNode.setCallSiteLoc(CallSite); // Walk through nodes in the moved the subtree, and update // FunctionSamples' context as for the context promotion. // We also need to set new parant link for all children. std::queue NodeToUpdate; - NewNode.setParentContext(this); + NewNode.setParentContext(&ToNodeParent); NodeToUpdate.push(&NewNode); while (!NodeToUpdate.empty()) { @@ -88,10 +89,8 @@ FunctionSamples *FSamples = Node->getFunctionSamples(); if (FSamples) { - FSamples->getContext().promoteOnPath(ContextFramesToRemove); + setContextNode(FSamples, Node); FSamples->getContext().setState(SyntheticContext); - LLVM_DEBUG(dbgs() << " Context promoted to: " - << FSamples->getContext().toString() << "\n"); } for (auto &It : Node->getAllChildContext()) { @@ -101,10 +100,6 @@ } } - // Original context no longer needed, destroy if requested. - if (DeleteNode) - OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); - return NewNode; } @@ -148,6 +143,10 @@ ParentContext = Parent; } +void ContextTrieNode::setCallSiteLoc(const LineLocation &Loc) { + CallSiteLoc = Loc; +} + void ContextTrieNode::dumpNode() { dbgs() << "Node: " << FuncName << "\n" << " Callsite: " << CallSiteLoc << "\n" @@ -203,13 +202,23 @@ SampleContext Context = FuncSample.first; LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString() << "\n"); - if (!Context.isBaseContext()) - FuncToCtxtProfiles[Context.getName()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); NewNode->setFunctionSamples(FSamples); } + populateFuncToCtxtMap(); +} + +void SampleContextTracker::populateFuncToCtxtMap() { + for (auto *Node : *this) { + FunctionSamples *FSamples = Node->getFunctionSamples(); + if (FSamples) { + FSamples->getContext().setState(RawContext); + setContextNode(FSamples, Node); + FuncToCtxtProfiles[Node->getFuncName()].push_back(FSamples); + } + } } FunctionSamples * @@ -232,7 +241,7 @@ if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); LLVM_DEBUG(if (FSamples) { - dbgs() << " Callee context found: " << FSamples->getContext().toString() + dbgs() << " Callee context found: " << getContextString(CalleeContext) << "\n"; }); return FSamples; @@ -334,7 +343,7 @@ if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) continue; - ContextTrieNode *FromNode = getContextFor(Context); + ContextTrieNode *FromNode = getContextNodeForProfile(CSamples); if (FromNode == Node) continue; @@ -355,7 +364,7 @@ const FunctionSamples *InlinedSamples) { assert(InlinedSamples && "Expect non-null inlined samples"); LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " - << InlinedSamples->getContext().toString() << "\n"); + << getContextString(*InlinedSamples) << "\n"); InlinedSamples->getContext().setState(InlinedContext); } @@ -407,15 +416,39 @@ FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); assert(FromSamples && "Shouldn't promote a context without profile"); LLVM_DEBUG(dbgs() << " Found context tree root to promote: " - << FromSamples->getContext().toString() << "\n"); + << getContextString(&NodeToPromo) << "\n"); assert(!FromSamples->getContext().hasState(InlinedContext) && "Shouldn't promote inlined context profile"); - uint32_t ContextFramesToRemove = - FromSamples->getContext().getContextFrames().size() - 1; - return promoteMergeContextSamplesTree(NodeToPromo, RootContext, - ContextFramesToRemove); + return promoteMergeContextSamplesTree(NodeToPromo, RootContext); +} + +#ifndef NDEBUG +std::string +SampleContextTracker::getContextString(const FunctionSamples &FSamples) const { + return getContextString(getContextNodeForProfile(&FSamples)); +} + +std::string +SampleContextTracker::getContextString(ContextTrieNode *Node) const { + SampleContextFrameVector Res; + if (Node == &RootContext) + return std::string(); + Res.emplace_back(Node->getFuncName(), LineLocation(0, 0)); + + ContextTrieNode *PreNode = Node; + Node = Node->getParentContext(); + while (Node && Node != &RootContext) { + Res.emplace_back(Node->getFuncName(), PreNode->getCallSiteLoc()); + PreNode = Node; + Node = Node->getParentContext(); + } + + std::reverse(Res.begin(), Res.end()); + + return SampleContext::getContextString(Res); } +#endif void SampleContextTracker::dump() { RootContext.dumpTree(); } @@ -527,8 +560,7 @@ } void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, - ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove) { + ContextTrieNode &ToNode) { FunctionSamples *FromSamples = FromNode.getFunctionSamples(); FunctionSamples *ToSamples = ToNode.getFunctionSamples(); if (FromSamples && ToSamples) { @@ -541,16 +573,13 @@ } else if (FromSamples) { // Transfer FromSamples from FromNode to ToNode ToNode.setFunctionSamples(FromSamples); + setContextNode(FromSamples, &ToNode); FromSamples->getContext().setState(SyntheticContext); - FromSamples->getContext().promoteOnPath(ContextFramesToRemove); - FromNode.setFunctionSamples(nullptr); } } ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( - ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove) { - assert(ContextFramesToRemove && "Context to remove can't be empty"); + ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent) { // Ignore call site location if destination is top level under root LineLocation NewCallSiteLoc = LineLocation(0, 0); @@ -567,22 +596,25 @@ if (!ToNode) { // Do not delete node to move from its parent here because // caller is iterating over children of that parent node. - ToNode = &ToNodeParent.moveToChildContext( - NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false); + ToNode = + &moveContextSamples(ToNodeParent, NewCallSiteLoc, std::move(FromNode)); + LLVM_DEBUG({ + dbgs() << " Context promoted and merged to: " << getContextString(ToNode) + << "\n"; + }); } else { // Destination node exists, merge samples for the context tree - mergeContextNode(FromNode, *ToNode, ContextFramesToRemove); + mergeContextNode(FromNode, *ToNode); LLVM_DEBUG({ if (ToNode->getFunctionSamples()) dbgs() << " Context promoted and merged to: " - << ToNode->getFunctionSamples()->getContext().toString() << "\n"; + << getContextString(ToNode) << "\n"; }); // Recursively promote and merge children for (auto &It : FromNode.getAllChildContext()) { ContextTrieNode &FromChildNode = It.second; - promoteMergeContextSamplesTree(FromChildNode, *ToNode, - ContextFramesToRemove); + promoteMergeContextSamplesTree(FromChildNode, *ToNode); } // Remove children once they're all merged @@ -598,21 +630,11 @@ void SampleContextTracker::createContextLessProfileMap( SampleProfileMap &ContextLessProfiles) { - std::queue NodeQueue; - NodeQueue.push(&RootContext); - - while (!NodeQueue.empty()) { - ContextTrieNode *Node = NodeQueue.front(); + for (auto *Node : *this) { FunctionSamples *FProfile = Node->getFunctionSamples(); - NodeQueue.pop(); - - if (FProfile) { - // Profile's context can be empty, use ContextNode's func name. + // Profile's context can be empty, use ContextNode's func name. + if (FProfile) ContextLessProfiles[Node->getFuncName()].merge(*FProfile); - } - - for (auto &It : Node->getAllChildContext()) - NodeQueue.push(&It.second); } } } // namespace llvm diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1073,8 +1073,7 @@ return; } - ContextTrieNode *Caller = - ContextTracker->getContextFor(Samples->getContext()); + ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples); std::queue CalleeList; CalleeList.push(Caller); while (!CalleeList.empty()) { diff --git a/llvm/tools/llvm-profgen/CSPreInliner.h b/llvm/tools/llvm-profgen/CSPreInliner.h --- a/llvm/tools/llvm-profgen/CSPreInliner.h +++ b/llvm/tools/llvm-profgen/CSPreInliner.h @@ -67,7 +67,7 @@ // size by only keep context that is estimated to be inlined. class CSPreInliner { public: - CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, + CSPreInliner(SampleContextTracker &Tracker, ProfiledBinary &Binary, ProfileSummary *Summary); void run(); @@ -77,10 +77,9 @@ std::vector buildTopDownOrder(); void processFunction(StringRef Name); bool shouldInline(ProfiledInlineCandidate &Candidate); - uint32_t getFuncSize(const FunctionSamples &FSamples); + uint32_t getFuncSize(const ContextTrieNode *ContextNode); bool UseContextCost; - SampleContextTracker ContextTracker; - SampleProfileMap &ProfileMap; + SampleContextTracker &ContextTracker; ProfiledBinary &Binary; ProfileSummary *Summary; }; diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -55,14 +55,13 @@ cl::desc( "Replay previous inlining and adjust context profile accordingly")); -CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, - ProfileSummary *Summary) +CSPreInliner::CSPreInliner(SampleContextTracker &Tracker, + ProfiledBinary &Binary, ProfileSummary *Summary) : UseContextCost(UseContextCostForPreInliner), // TODO: Pass in a guid-to-name map in order for // ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes // as their profile context. - ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary), - Summary(Summary) { + ContextTracker(Tracker), Binary(Binary), Summary(Summary) { // Set default preinliner hot/cold call site threshold tuned with CSSPGO. // for good performance with reasonable profile size. if (!SampleHotCallSiteThreshold.getNumOccurrences()) @@ -107,7 +106,7 @@ // current one in the trie is relavent. So we walk the trie instead of call // targets from function profile. ContextTrieNode *CallerNode = - ContextTracker.getContextFor(CallerSamples->getContext()); + ContextTracker.getContextNodeForProfile(CallerSamples); bool HasNewCandidate = false; for (auto &Child : CallerNode->getAllChildContext()) { @@ -131,7 +130,7 @@ // TODO: call site and callee entry count should be mostly consistent, add // check for that. HasNewCandidate = true; - uint32_t CalleeSize = getFuncSize(*CalleeSamples); + uint32_t CalleeSize = getFuncSize(CalleeNode); CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount), CalleeSize); } @@ -139,12 +138,11 @@ return HasNewCandidate; } -uint32_t CSPreInliner::getFuncSize(const FunctionSamples &FSamples) { - if (UseContextCost) { - return Binary.getFuncSizeForContext(FSamples.getContext()); - } +uint32_t CSPreInliner::getFuncSize(const ContextTrieNode *ContextNode) { + if (UseContextCost) + return Binary.getFuncSizeForContext(ContextNode); - return FSamples.getBodySamples().size(); + return ContextNode->getFunctionSamples()->getBodySamples().size(); } bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) { @@ -189,7 +187,8 @@ if (!FSamples) return; - unsigned FuncSize = getFuncSize(*FSamples); + unsigned FuncSize = + getFuncSize(ContextTracker.getContextNodeForProfile(FSamples)); unsigned FuncFinalSize = FuncSize; unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit; SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); @@ -218,11 +217,12 @@ } else { ++PreInlNumCSNotInlined; } - LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined") - << " context profile for: " - << Candidate.CalleeSamples->getContext().toString() - << " (callee size: " << Candidate.SizeCost - << ", call count:" << Candidate.CallsiteCount << ")\n"); + LLVM_DEBUG( + dbgs() << (ShouldInline ? " Inlined" : " Outlined") + << " context profile for: " + << ContextTracker.getContextString(*Candidate.CalleeSamples) + << " (callee size: " << Candidate.SizeCost + << ", call count:" << Candidate.CallsiteCount << ")\n"); } if (!CQueue.empty()) { @@ -246,7 +246,8 @@ CQueue.pop(); bool WasInlined = Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined); - dbgs() << " " << Candidate.CalleeSamples->getContext().toString() + dbgs() << " " + << ContextTracker.getContextString(*Candidate.CalleeSamples) << " (candidate size:" << Candidate.SizeCost << ", call count: " << Candidate.CallsiteCount << ", previously " << (WasInlined ? "inlined)\n" : "not inlined)\n"); @@ -256,19 +257,24 @@ void CSPreInliner::run() { #ifndef NDEBUG - auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) { - dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" - << Profiles.size() << " total):\n"; - for (auto &It : Profiles) { - const FunctionSamples &Samples = It.second; - dbgs() << " [" << Samples.getContext().toString() << "] " - << Samples.getTotalSamples() << ":" << Samples.getHeadSamples() - << "\n"; + auto printProfileNames = [](SampleContextTracker &ContextTracker, + bool IsInput) { + uint32_t Size = 0; + for (auto *Node : ContextTracker) { + FunctionSamples *FSamples = Node->getFunctionSamples(); + if (FSamples) { + Size++; + dbgs() << " [" << ContextTracker.getContextString(Node) << "] " + << FSamples->getTotalSamples() << ":" + << FSamples->getHeadSamples() << "\n"; + } } + dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" + << Size << " total):\n"; }; #endif - LLVM_DEBUG(printProfileNames(ProfileMap, true)); + LLVM_DEBUG(printProfileNames(ContextTracker, true)); // Execute global pre-inliner to estimate a global top-down inline // decision and merge profiles accordingly. This helps with profile @@ -283,24 +289,15 @@ // Not inlined context profiles are merged into its base, so we can // trim out such profiles from the output. - std::vector ProfilesToBeRemoved; - for (auto &It : ProfileMap) { - SampleContext &Context = It.second.getContext(); - if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) { - assert(Context.hasState(MergedContext) && - "Not inlined context profile should be merged already"); - ProfilesToBeRemoved.push_back(It.first); + for (auto *Node : ContextTracker) { + FunctionSamples *FProfile = Node->getFunctionSamples(); + if (FProfile && + (Node->getParentContext() != &ContextTracker.getRootContext() && + !FProfile->getContext().hasState(InlinedContext))) { + Node->setFunctionSamples(nullptr); } } - - for (auto &ContextName : ProfilesToBeRemoved) { - ProfileMap.erase(ContextName); - } - - // Make sure ProfileMap's key is consistent with FunctionSamples' name. - SampleContextTrimmer(ProfileMap).canonicalizeContextProfiles(); - FunctionSamples::ProfileIsPreInlined = true; - LLVM_DEBUG(printProfileNames(ProfileMap, false)); + LLVM_DEBUG(printProfileNames(ContextTracker, false)); } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -456,18 +456,10 @@ bool CSProfileGenerator::collectFunctionsFromLLVMProfile( std::unordered_set &ProfiledFunctions) { - std::queue NodeQueue; - NodeQueue.push(&getRootContext()); - while (!NodeQueue.empty()) { - ContextTrieNode *Node = NodeQueue.front(); - NodeQueue.pop(); - + for (auto *Node : ContextTracker) { if (!Node->getFuncName().empty()) if (auto *Func = Binary->getBinaryFunction(Node->getFuncName())) ProfiledFunctions.insert(Func); - - for (auto &It : Node->getAllChildContext()) - NodeQueue.push(&It.second); } return true; } @@ -806,22 +798,13 @@ } void CSProfileGenerator::updateFunctionSamples() { - std::queue NodeQueue; - NodeQueue.push(&getRootContext()); - - while (!NodeQueue.empty()) { - ContextTrieNode *Node = NodeQueue.front(); - NodeQueue.pop(); - + for (auto *Node : ContextTracker) { FunctionSamples *FSamples = Node->getFunctionSamples(); if (FSamples) { if (UpdateTotalSamples) FSamples->updateTotalSamples(); FSamples->updateCallsiteSamples(); } - - for (auto &It : Node->getAllChildContext()) - NodeQueue.push(&It.second); } } @@ -999,17 +982,18 @@ // context profile merging/trimming. computeSummaryAndThreshold(); - convertToProfileMap(); - // Run global pre-inliner to adjust/merge context profile based on estimated // inline decisions. if (EnableCSPreInliner) { - CSPreInliner(ProfileMap, *Binary, Summary.get()).run(); + ContextTracker.populateFuncToCtxtMap(); + CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); // Turn off the profile merger by default unless it is explicitly enabled. if (!CSProfMergeColdContext.getNumOccurrences()) CSProfMergeColdContext = false; } + convertToProfileMap(); + // Trim and merge cold context profile using cold threshold above. if (TrimColdProfile || CSProfMergeColdContext) { SampleContextTrimmer(ProfileMap) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -161,7 +161,7 @@ // Get function size with a specific context. When there's no exact match // for the given context, try to retrieve the size of that function from // closest matching context. - uint32_t getFuncSizeForContext(const SampleContext &Context); + uint32_t getFuncSizeForContext(const ContextTrieNode *Context); // For inlinees that are full optimized away, we can establish zero size using // their remaining probes. @@ -485,8 +485,8 @@ return &I->second; } - uint32_t getFuncSizeForContext(SampleContext &Context) { - return FuncSizeTracker.getFuncSizeForContext(Context); + uint32_t getFuncSizeForContext(const ContextTrieNode *ContextNode) { + return FuncSizeTracker.getFuncSizeForContext(ContextNode); } // Load the symbols from debug table and populate into symbol list. diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -83,25 +83,23 @@ } uint32_t -BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) { +BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) { ContextTrieNode *CurrNode = &RootContext; ContextTrieNode *PrevNode = nullptr; - SampleContextFrames Frames = Context.getContextFrames(); - int32_t I = Frames.size() - 1; + Optional Size; // Start from top-level context-less function, traverse down the reverse // context trie to find the best/longest match for given context, then // retrieve the size. - - while (CurrNode && I >= 0) { - // Process from leaf function to callers (added to context). - const auto &ChildFrame = Frames[I--]; + LineLocation CallSiteLoc(0, 0); + while (CurrNode && Node->getParentContext() != nullptr) { PrevNode = CurrNode; - CurrNode = - CurrNode->getChildContext(ChildFrame.Location, ChildFrame.FuncName); + CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName()); if (CurrNode && CurrNode->getFunctionSize()) Size = CurrNode->getFunctionSize().getValue(); + CallSiteLoc = Node->getCallSiteLoc(); + Node = Node->getParentContext(); } // If we traversed all nodes along the path of the context and haven't