diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -39,6 +39,7 @@ class DILocation; class raw_ostream; +class ContextTrieNode; const std::error_category &sampleprof_category(); @@ -553,16 +554,6 @@ } } - // Promote context by removing top frames with the length of - // `ContextFramesToRemove`. Note that with array representation of context, - // the promotion is effectively a slice operation with first - // `ContextFramesToRemove` elements removed from left. - void promoteOnPath(uint32_t ContextFramesToRemove) { - assert(ContextFramesToRemove <= FullContext.size() && - "Cannot remove more than the whole context"); - FullContext = FullContext.drop_front(ContextFramesToRemove); - } - // Decode context string for a frame to get function name and location. // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. static void decodeContextString(StringRef ContextStr, StringRef &FName, diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/SampleProf.h" #include +#include #include namespace llvm { @@ -47,7 +48,6 @@ ContextTrieNode &moveToChildContext(const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, bool DeleteNode = true); void removeChildContext(const LineLocation &CallSite, StringRef ChildName); std::map &getAllChildContext(); @@ -91,23 +91,15 @@ // calling context and the context is identified by path from root to the node. class SampleContextTracker { public: - struct ProfileComparer { - bool operator()(FunctionSamples *A, FunctionSamples *B) const { - // Sort function profiles by the number of total samples and their - // contexts. - if (A->getTotalSamples() == B->getTotalSamples()) - return A->getContext() < B->getContext(); - return A->getTotalSamples() > B->getTotalSamples(); - } - }; - // Keep profiles of a function sorted so that they will be processed/promoted // deterministically. - using ContextSamplesTy = std::set; + using ContextSamplesTy = SmallVector; SampleContextTracker() = default; SampleContextTracker(SampleProfileMap &Profiles, const DenseMap *GUIDToFuncNameMap); + // Populate the FuncToCtxtProfiles map after the trie is built. + void populateFuncToCtxtMap(); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, @@ -131,6 +123,9 @@ bool MergeContext = true); // Query base profile for a given function by name. FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true); + // Query the context node of base profile for a given function by name. + ContextTrieNode *getBaseContextNodeFor(StringRef Name, + bool MergeContext = true); // Retrieve the context trie node for given profile context ContextTrieNode *getContextFor(const SampleContext &Context); // Get real function name for a given trie node. @@ -145,6 +140,16 @@ // Create a merged conext-less profile map. void createContextLessProfileMap(SampleProfileMap &ContextLessProfiles); + ContextTrieNode * + getContextNodeForFSamples(const FunctionSamples *FSamples) const { + auto I = FSamples2NodeMap.find(FSamples); + if (I == FSamples2NodeMap.end()) + return nullptr; + return I->second; + } + // Get a context string from root to current node. + std::string getContextString(const FunctionSamples &FSamples) const; + std::string getContextString(ContextTrieNode *Node) const; // Dump the internal context profile trie. void dump(); @@ -155,15 +160,23 @@ ContextTrieNode *getTopLevelContextNode(StringRef FName); ContextTrieNode &addTopLevelContextNode(StringRef FName); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); - void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove); + void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode); ContextTrieNode & promoteMergeContextSamplesTree(ContextTrieNode &FromNode, - ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove); + ContextTrieNode &ToNodeParent); + ContextTrieNode &moveToChildContext(ContextTrieNode &ToNodeParent, + const LineLocation &CallSite, + ContextTrieNode &&NodeToMove); + void setContextNode(const FunctionSamples *FSample, ContextTrieNode *Node) { + FSamples2NodeMap[FSample] = Node; + } // Map from function name to context profiles (excluding base profile) StringMap FuncToCtxtProfiles; + // Map from current FunctionSample to the belonged context trie. + std::unordered_map + FSamples2NodeMap; + // Map from function guid to real function names. Only used in md5 mode. const DenseMap *GUIDToFuncNameMap; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -63,9 +63,10 @@ return ChildNodeRet; } -ContextTrieNode &ContextTrieNode::moveToChildContext( - const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, bool DeleteNode) { +ContextTrieNode & +ContextTrieNode::moveToChildContext(const LineLocation &CallSite, + ContextTrieNode &&NodeToMove, + bool DeleteNode) { uint64_t Hash = FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); @@ -75,11 +76,25 @@ ContextTrieNode &NewNode = AllChildContext[Hash]; NewNode.CallSiteLoc = CallSite; + // Original context no longer needed, destroy if requested. + if (DeleteNode) + OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); + + return NewNode; +} + +ContextTrieNode & +SampleContextTracker::moveToChildContext(ContextTrieNode &ToNodeParent, + const LineLocation &CallSite, + ContextTrieNode &&NodeToMove) { + + ContextTrieNode &NewNode = + ToNodeParent.moveToChildContext(CallSite, std::move(NodeToMove), false); // Walk through nodes in the moved the subtree, and update // FunctionSamples' context as for the context promotion. // We also need to set new parant link for all children. std::queue NodeToUpdate; - NewNode.setParentContext(this); + NewNode.setParentContext(&ToNodeParent); NodeToUpdate.push(&NewNode); while (!NodeToUpdate.empty()) { @@ -88,10 +103,8 @@ FunctionSamples *FSamples = Node->getFunctionSamples(); if (FSamples) { - FSamples->getContext().promoteOnPath(ContextFramesToRemove); + setContextNode(FSamples, Node); FSamples->getContext().setState(SyntheticContext); - LLVM_DEBUG(dbgs() << " Context promoted to: " - << FSamples->getContext().toString() << "\n"); } for (auto &It : Node->getAllChildContext()) { @@ -101,10 +114,6 @@ } } - // Original context no longer needed, destroy if requested. - if (DeleteNode) - OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); - return NewNode; } @@ -203,13 +212,33 @@ SampleContext Context = FuncSample.first; LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString() << "\n"); - if (!Context.isBaseContext()) - FuncToCtxtProfiles[Context.getName()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); NewNode->setFunctionSamples(FSamples); } + populateFuncToCtxtMap(); +} + +void SampleContextTracker::populateFuncToCtxtMap() { + std::queue NodeQueue; + NodeQueue.push(&RootContext); + + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + + FunctionSamples *FSamples = Node->getFunctionSamples(); + if (FSamples) { + FSamples->getContext().setState(RawContext); + setContextNode(FSamples, Node); + if (Node->getParentContext() != &RootContext) + FuncToCtxtProfiles[Node->getFuncName()].push_back(FSamples); + } + + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); + } } FunctionSamples * @@ -232,7 +261,7 @@ if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); LLVM_DEBUG(if (FSamples) { - dbgs() << " Callee context found: " << FSamples->getContext().toString() + dbgs() << " Callee context found: " << getContextString(CalleeContext) << "\n"; }); return FSamples; @@ -310,6 +339,15 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, bool MergeContext) { + ContextTrieNode *ContextNode = getBaseContextNodeFor(Name, MergeContext); + // Still no profile even after merge/promotion (if allowed) + if (!ContextNode) + return nullptr; + return ContextNode->getFunctionSamples(); +} + +ContextTrieNode * +SampleContextTracker::getBaseContextNodeFor(StringRef Name, bool MergeContext) { LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n"); // Convert real function names to MD5 names, if the input profile is // MD5-based. @@ -334,7 +372,7 @@ if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) continue; - ContextTrieNode *FromNode = getContextFor(Context); + ContextTrieNode *FromNode = getContextNodeForFSamples(CSamples); if (FromNode == Node) continue; @@ -343,19 +381,14 @@ Node = &ToNode; } } - - // Still no profile even after merge/promotion (if allowed) - if (!Node) - return nullptr; - - return Node->getFunctionSamples(); + return Node; } void SampleContextTracker::markContextSamplesInlined( const FunctionSamples *InlinedSamples) { assert(InlinedSamples && "Expect non-null inlined samples"); LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " - << InlinedSamples->getContext().toString() << "\n"); + << getContextString(*InlinedSamples) << "\n"); InlinedSamples->getContext().setState(InlinedContext); } @@ -407,14 +440,36 @@ FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); assert(FromSamples && "Shouldn't promote a context without profile"); LLVM_DEBUG(dbgs() << " Found context tree root to promote: " - << FromSamples->getContext().toString() << "\n"); + << getContextString(&NodeToPromo) << "\n"); assert(!FromSamples->getContext().hasState(InlinedContext) && "Shouldn't promote inlined context profile"); - uint32_t ContextFramesToRemove = - FromSamples->getContext().getContextFrames().size() - 1; - return promoteMergeContextSamplesTree(NodeToPromo, RootContext, - ContextFramesToRemove); + return promoteMergeContextSamplesTree(NodeToPromo, RootContext); +} + +std::string +SampleContextTracker::getContextString(const FunctionSamples &FSamples) const { + return getContextString(getContextNodeForFSamples(&FSamples)); +} + +std::string +SampleContextTracker::getContextString(ContextTrieNode *Node) const { + SampleContextFrameVector Res; + if (Node == &RootContext) + return std::string(); + Res.emplace_back(Node->getFuncName(), LineLocation(0, 0)); + + ContextTrieNode *PreNode = Node; + Node = Node->getParentContext(); + while (Node && Node != &RootContext) { + Res.emplace_back(Node->getFuncName(), PreNode->getCallSiteLoc()); + PreNode = Node; + Node = Node->getParentContext(); + } + + std::reverse(Res.begin(), Res.end()); + + return SampleContext::getContextString(Res); } void SampleContextTracker::dump() { RootContext.dumpTree(); } @@ -527,8 +582,7 @@ } void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, - ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove) { + ContextTrieNode &ToNode) { FunctionSamples *FromSamples = FromNode.getFunctionSamples(); FunctionSamples *ToSamples = ToNode.getFunctionSamples(); if (FromSamples && ToSamples) { @@ -541,16 +595,13 @@ } else if (FromSamples) { // Transfer FromSamples from FromNode to ToNode ToNode.setFunctionSamples(FromSamples); + setContextNode(FromSamples, &ToNode); FromSamples->getContext().setState(SyntheticContext); - FromSamples->getContext().promoteOnPath(ContextFramesToRemove); - FromNode.setFunctionSamples(nullptr); } } ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( - ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove) { - assert(ContextFramesToRemove && "Context to remove can't be empty"); + ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent) { // Ignore call site location if destination is top level under root LineLocation NewCallSiteLoc = LineLocation(0, 0); @@ -567,22 +618,25 @@ if (!ToNode) { // Do not delete node to move from its parent here because // caller is iterating over children of that parent node. - ToNode = &ToNodeParent.moveToChildContext( - NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false); + ToNode = + &moveToChildContext(ToNodeParent, NewCallSiteLoc, std::move(FromNode)); + LLVM_DEBUG({ + dbgs() << " Context promoted and merged to: " << getContextString(ToNode) + << "\n"; + }); } else { // Destination node exists, merge samples for the context tree - mergeContextNode(FromNode, *ToNode, ContextFramesToRemove); + mergeContextNode(FromNode, *ToNode); LLVM_DEBUG({ if (ToNode->getFunctionSamples()) dbgs() << " Context promoted and merged to: " - << ToNode->getFunctionSamples()->getContext().toString() << "\n"; + << getContextString(ToNode) << "\n"; }); // Recursively promote and merge children for (auto &It : FromNode.getAllChildContext()) { ContextTrieNode &FromChildNode = It.second; - promoteMergeContextSamplesTree(FromChildNode, *ToNode, - ContextFramesToRemove); + promoteMergeContextSamplesTree(FromChildNode, *ToNode); } // Remove children once they're all merged diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1059,8 +1059,7 @@ return; } - ContextTrieNode *Caller = - ContextTracker->getContextFor(Samples->getContext()); + ContextTrieNode *Caller = ContextTracker->getContextNodeForFSamples(Samples); std::queue CalleeList; CalleeList.push(Caller); while (!CalleeList.empty()) { diff --git a/llvm/tools/llvm-profgen/CSPreInliner.h b/llvm/tools/llvm-profgen/CSPreInliner.h --- a/llvm/tools/llvm-profgen/CSPreInliner.h +++ b/llvm/tools/llvm-profgen/CSPreInliner.h @@ -67,7 +67,7 @@ // size by only keep context that is estimated to be inlined. class CSPreInliner { public: - CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, + CSPreInliner(SampleContextTracker &Tracker, ProfiledBinary &Binary, ProfileSummary *Summary); void run(); @@ -77,10 +77,9 @@ std::vector buildTopDownOrder(); void processFunction(StringRef Name); bool shouldInline(ProfiledInlineCandidate &Candidate); - uint32_t getFuncSize(const FunctionSamples &FSamples); + uint32_t getFuncSize(ContextTrieNode *ContextNode); bool UseContextCost; - SampleContextTracker ContextTracker; - SampleProfileMap &ProfileMap; + SampleContextTracker &ContextTracker; ProfiledBinary &Binary; ProfileSummary *Summary; }; diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -55,14 +55,14 @@ cl::desc( "Replay previous inlining and adjust context profile accordingly")); -CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, - ProfileSummary *Summary) +CSPreInliner::CSPreInliner(SampleContextTracker &Tracker, + ProfiledBinary &Binary, ProfileSummary *Summary) : UseContextCost(UseContextCostForPreInliner), // TODO: Pass in a guid-to-name map in order for // ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes // as their profile context. - ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary), - Summary(Summary) { + ContextTracker(Tracker), Binary(Binary), Summary(Summary) { + ContextTracker.populateFuncToCtxtMap(); // Set default preinliner hot/cold call site threshold tuned with CSSPGO. // for good performance with reasonable profile size. if (!SampleHotCallSiteThreshold.getNumOccurrences()) @@ -105,7 +105,7 @@ // current one in the trie is relavent. So we walk the trie instead of call // targets from function profile. ContextTrieNode *CallerNode = - ContextTracker.getContextFor(CallerSamples->getContext()); + ContextTracker.getContextNodeForFSamples(CallerSamples); bool HasNewCandidate = false; for (auto &Child : CallerNode->getAllChildContext()) { @@ -129,7 +129,7 @@ // TODO: call site and callee entry count should be mostly consistent, add // check for that. HasNewCandidate = true; - uint32_t CalleeSize = getFuncSize(*CalleeSamples); + uint32_t CalleeSize = getFuncSize(CalleeNode); CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount), CalleeSize); } @@ -137,12 +137,11 @@ return HasNewCandidate; } -uint32_t CSPreInliner::getFuncSize(const FunctionSamples &FSamples) { - if (UseContextCost) { - return Binary.getFuncSizeForContext(FSamples.getContext()); - } +uint32_t CSPreInliner::getFuncSize(ContextTrieNode *ContextNode) { + if (UseContextCost) + return Binary.getFuncSizeForContext(ContextNode); - return FSamples.getBodySamples().size(); + return ContextNode->getFunctionSamples()->getBodySamples().size(); } bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) { @@ -187,7 +186,8 @@ if (!FSamples) return; - unsigned FuncSize = getFuncSize(*FSamples); + unsigned FuncSize = + getFuncSize(ContextTracker.getContextNodeForFSamples(FSamples)); unsigned FuncFinalSize = FuncSize; unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit; SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); @@ -216,11 +216,12 @@ } else { ++PreInlNumCSNotInlined; } - LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined") - << " context profile for: " - << Candidate.CalleeSamples->getContext().toString() - << " (callee size: " << Candidate.SizeCost - << ", call count:" << Candidate.CallsiteCount << ")\n"); + LLVM_DEBUG( + dbgs() << (ShouldInline ? " Inlined" : " Outlined") + << " context profile for: " + << ContextTracker.getContextString(*Candidate.CalleeSamples) + << " (callee size: " << Candidate.SizeCost + << ", call count:" << Candidate.CallsiteCount << ")\n"); } if (!CQueue.empty()) { @@ -244,7 +245,8 @@ CQueue.pop(); bool WasInlined = Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined); - dbgs() << " " << Candidate.CalleeSamples->getContext().toString() + dbgs() << " " + << ContextTracker.getContextString(*Candidate.CalleeSamples) << " (candidate size:" << Candidate.SizeCost << ", call count: " << Candidate.CallsiteCount << ", previously " << (WasInlined ? "inlined)\n" : "not inlined)\n"); @@ -254,19 +256,33 @@ void CSPreInliner::run() { #ifndef NDEBUG - auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) { - dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" - << Profiles.size() << " total):\n"; - for (auto &It : Profiles) { - const FunctionSamples &Samples = It.second; - dbgs() << " [" << Samples.getContext().toString() << "] " - << Samples.getTotalSamples() << ":" << Samples.getHeadSamples() - << "\n"; + auto printProfileNames = [](SampleContextTracker &ContextTracker, + bool IsInput) { + std::queue NodeQueue; + NodeQueue.push(&ContextTracker.getRootContext()); + uint32_t Size = 0; + + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + FunctionSamples *FSamples = Node->getFunctionSamples(); + + if (FSamples) { + Size++; + dbgs() << " [" << ContextTracker.getContextString(Node) << "] " + << FSamples->getTotalSamples() << ":" + << FSamples->getHeadSamples() << "\n"; + } + + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); } + dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" + << Size << " total):\n"; }; #endif - LLVM_DEBUG(printProfileNames(ProfileMap, true)); + LLVM_DEBUG(printProfileNames(ContextTracker, true)); // Execute global pre-inliner to estimate a global top-down inline // decision and merge profiles accordingly. This helps with profile @@ -281,24 +297,25 @@ // Not inlined context profiles are merged into its base, so we can // trim out such profiles from the output. - std::vector ProfilesToBeRemoved; - for (auto &It : ProfileMap) { - SampleContext &Context = It.second.getContext(); - if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) { - assert(Context.hasState(MergedContext) && - "Not inlined context profile should be merged already"); - ProfilesToBeRemoved.push_back(It.first); + std::queue NodeQueue; + NodeQueue.push(&ContextTracker.getRootContext()); + + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + FunctionSamples *FProfile = Node->getFunctionSamples(); + + if (FProfile && + (Node->getParentContext() != &ContextTracker.getRootContext() && + !FProfile->getContext().hasState(InlinedContext))) { + Node->setFunctionSamples(nullptr); } - } - for (auto &ContextName : ProfilesToBeRemoved) { - ProfileMap.erase(ContextName); + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); } - // Make sure ProfileMap's key is consistent with FunctionSamples' name. - SampleContextTrimmer(ProfileMap).canonicalizeContextProfiles(); - FunctionSamples::ProfileIsPreInlined = true; - LLVM_DEBUG(printProfileNames(ProfileMap, false)); + LLVM_DEBUG(printProfileNames(ContextTracker, false)); } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -985,19 +985,19 @@ // context profile merging/trimming. computeSummaryAndThreshold(); - buildProfileMap(); - // TODO: free the FunctionSamples' memory that is created in profile - // generator. - // Run global pre-inliner to adjust/merge context profile based on estimated // inline decisions. if (EnableCSPreInliner) { - CSPreInliner(ProfileMap, *Binary, Summary.get()).run(); + CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); // Turn off the profile merger by default unless it is explicitly enabled. if (!CSProfMergeColdContext.getNumOccurrences()) CSProfMergeColdContext = false; } + buildProfileMap(); + // TODO: free the FunctionSamples' memory that is created in profile + // generator. + // Trim and merge cold context profile using cold threshold above. if (TrimColdProfile || CSProfMergeColdContext) { SampleContextTrimmer(ProfileMap) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -161,7 +161,7 @@ // Get function size with a specific context. When there's no exact match // for the given context, try to retrieve the size of that function from // closest matching context. - uint32_t getFuncSizeForContext(const SampleContext &Context); + uint32_t getFuncSizeForContext(const ContextTrieNode *Context); // For inlinees that are full optimized away, we can establish zero size using // their remaining probes. @@ -485,8 +485,8 @@ return &I->second; } - uint32_t getFuncSizeForContext(SampleContext &Context) { - return FuncSizeTracker.getFuncSizeForContext(Context); + uint32_t getFuncSizeForContext(const ContextTrieNode *ContextNode) { + return FuncSizeTracker.getFuncSizeForContext(ContextNode); } // Load the symbols from debug table and populate into symbol list. diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -85,25 +85,23 @@ } uint32_t -BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) { +BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) { ContextTrieNode *CurrNode = &RootContext; ContextTrieNode *PrevNode = nullptr; - SampleContextFrames Frames = Context.getContextFrames(); - int32_t I = Frames.size() - 1; + Optional Size; // Start from top-level context-less function, traverse down the reverse // context trie to find the best/longest match for given context, then // retrieve the size. - - while (CurrNode && I >= 0) { - // Process from leaf function to callers (added to context). - const auto &ChildFrame = Frames[I--]; + LineLocation CallSiteLoc(0, 0); + while (CurrNode && Node->getParentContext() != nullptr) { PrevNode = CurrNode; - CurrNode = - CurrNode->getChildContext(ChildFrame.Location, ChildFrame.FuncName); + CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName()); if (CurrNode && CurrNode->getFunctionSize().hasValue()) Size = CurrNode->getFunctionSize().getValue(); + CallSiteLoc = Node->getCallSiteLoc(); + Node = Node->getParentContext(); } // If we traversed all nodes along the path of the context and haven't