diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -39,6 +39,7 @@ class DILocation; class raw_ostream; +class ContextTrieNode; const std::error_category &sampleprof_category(); @@ -594,6 +595,8 @@ bool hasState(ContextStateMask S) { return State & (uint32_t)S; } void setState(ContextStateMask S) { State |= (uint32_t)S; } void clearState(ContextStateMask S) { State &= (uint32_t)~S; } + void setContextNode(ContextTrieNode *Node) { ContextNode = Node; } + ContextTrieNode *getContextNode() { return ContextNode; } bool hasContext() const { return State != UnknownContext; } bool isBaseContext() const { return FullContext.size() == 1; } StringRef getName() const { return Name; } @@ -692,6 +695,10 @@ StringRef Name; // Full context including calling context and leaf function name SampleContextFrames FullContext; + // Pointer points to the trie node from context tracker. Each FunctionSamples + // should be associated with one context node and the context is represented + // by the path from root to this node. + ContextTrieNode *ContextNode; // State of the associated sample profile uint32_t State; // Attribute of the associated sample profile @@ -942,7 +949,7 @@ sampleprof_error Result = sampleprof_error::success; if (!GUIDToFuncNameMap) GUIDToFuncNameMap = Other.GUIDToFuncNameMap; - if (Context.getName().empty()) + if (Context.getName().empty() && Context.getContextNode() == nullptr) Context = Other.getContext(); if (FunctionHash == 0) { // Set the function hash code for the target profile. @@ -1119,6 +1126,10 @@ void setContext(const SampleContext &FContext) { Context = FContext; } + void setContextNode(ContextTrieNode *Node); + + void updateContextNode(ContextTrieNode *NewNode); + /// Whether the profile uses MD5 to represent string. static bool UseMD5; diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/SampleProf.h" #include +#include #include namespace llvm { @@ -47,7 +48,6 @@ ContextTrieNode &moveToChildContext(const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, bool DeleteNode = true); void removeChildContext(const LineLocation &CallSite, StringRef ChildName); std::map &getAllChildContext(); @@ -59,6 +59,12 @@ LineLocation getCallSiteLoc() const; ContextTrieNode *getParentContext() const; void setParentContext(ContextTrieNode *Parent); + std::unordered_set &getFSamplesPointerSet() { + return FSamplesPointerSet; + } + void addFSamplesPointerSet(FunctionSamples *FSamples) { + FSamplesPointerSet.insert(FSamples); + } void dumpNode(); void dumpTree(); @@ -80,6 +86,13 @@ // Callsite location in parent context LineLocation CallSiteLoc; + + // A set of FunctionSamples associates with current node, i,e. the + // FunctionSamples's ContextNode field points to this node. This is used for + // context tracker to promote and merge node, the context node can be changed + // and merged and we need to find which node the current FunctionSamples + // points to. + std::unordered_set FSamplesPointerSet; }; // Profile tracker that manages profiles and its associated context. It @@ -91,23 +104,19 @@ // calling context and the context is identified by path from root to the node. class SampleContextTracker { public: - struct ProfileComparer { - bool operator()(FunctionSamples *A, FunctionSamples *B) const { - // Sort function profiles by the number of total samples and their - // contexts. - if (A->getTotalSamples() == B->getTotalSamples()) - return A->getContext() < B->getContext(); - return A->getTotalSamples() > B->getTotalSamples(); - } - }; - // Keep profiles of a function sorted so that they will be processed/promoted // deterministically. - using ContextSamplesTy = std::set; + using ContextSamplesTy = SmallVector; SampleContextTracker() = default; SampleContextTracker(SampleProfileMap &Profiles, const DenseMap *GUIDToFuncNameMap); + + static void setContextNode(FunctionSamples *FSamples, ContextTrieNode *Node); + static void updateContextNode(FunctionSamples *FSamples, + ContextTrieNode *NewNode); + // Populate the FuncToCtxtProfiles map after the trie is built. + void populateFuncToCtxtMap(); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, @@ -131,6 +140,9 @@ bool MergeContext = true); // Query base profile for a given function by name. FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true); + // Query the context node of base profile for a given function by name. + ContextTrieNode *getBaseContextNodeFor(StringRef Name, + bool MergeContext = true); // Retrieve the context trie node for given profile context ContextTrieNode *getContextFor(const SampleContext &Context); // Get real function name for a given trie node. @@ -145,6 +157,9 @@ // Create a merged conext-less profile map. void createContextLessProfileMap(SampleProfileMap &ContextLessProfiles); + // Get a context string from root to current node. + std::string getContextString(const FunctionSamples &FSamples) const; + std::string getContextString(ContextTrieNode *Node) const; // Dump the internal context profile trie. void dump(); @@ -155,12 +170,10 @@ ContextTrieNode *getTopLevelContextNode(StringRef FName); ContextTrieNode &addTopLevelContextNode(StringRef FName); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); - void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove); + void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode); ContextTrieNode & promoteMergeContextSamplesTree(ContextTrieNode &FromNode, - ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove); + ContextTrieNode &ToNodeParent); // Map from function name to context profiles (excluding base profile) StringMap FuncToCtxtProfiles; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -63,9 +63,10 @@ return ChildNodeRet; } -ContextTrieNode &ContextTrieNode::moveToChildContext( - const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, bool DeleteNode) { +ContextTrieNode & +ContextTrieNode::moveToChildContext(const LineLocation &CallSite, + ContextTrieNode &&NodeToMove, + bool DeleteNode) { uint64_t Hash = FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); @@ -88,10 +89,8 @@ FunctionSamples *FSamples = Node->getFunctionSamples(); if (FSamples) { - FSamples->getContext().promoteOnPath(ContextFramesToRemove); + FSamples->updateContextNode(Node); FSamples->getContext().setState(SyntheticContext); - LLVM_DEBUG(dbgs() << " Context promoted to: " - << FSamples->getContext().toString() << "\n"); } for (auto &It : Node->getAllChildContext()) { @@ -203,13 +202,48 @@ SampleContext Context = FuncSample.first; LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString() << "\n"); - if (!Context.isBaseContext()) - FuncToCtxtProfiles[Context.getName()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); NewNode->setFunctionSamples(FSamples); } + populateFuncToCtxtMap(); +} + +void FunctionSamples::setContextNode(ContextTrieNode *Node) { + Context.setContextNode(Node); + Node->addFSamplesPointerSet(this); +} + +void FunctionSamples::updateContextNode(ContextTrieNode *NewNode) { + ContextTrieNode *OldNode = Context.getContextNode(); + for (auto *OSamples : OldNode->getFSamplesPointerSet()) { + OSamples->getContext().setContextNode(NewNode); + NewNode->addFSamplesPointerSet(OSamples); + } + OldNode->getFSamplesPointerSet().clear(); + setContextNode(NewNode); +} + +void SampleContextTracker::populateFuncToCtxtMap() { + std::queue NodeQueue; + NodeQueue.push(&RootContext); + + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + + FunctionSamples *FSamples = Node->getFunctionSamples(); + if (FSamples) { + FSamples->getContext().setState(RawContext); + FSamples->setContextNode(Node); + if (Node->getParentContext() != &RootContext) + FuncToCtxtProfiles[Node->getFuncName()].push_back(FSamples); + } + + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); + } } FunctionSamples * @@ -232,7 +266,7 @@ if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); LLVM_DEBUG(if (FSamples) { - dbgs() << " Callee context found: " << FSamples->getContext().toString() + dbgs() << " Callee context found: " << getContextString(CalleeContext) << "\n"; }); return FSamples; @@ -310,6 +344,15 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, bool MergeContext) { + ContextTrieNode *ContextNode = getBaseContextNodeFor(Name, MergeContext); + // Still no profile even after merge/promotion (if allowed) + if (!ContextNode) + return nullptr; + return ContextNode->getFunctionSamples(); +} + +ContextTrieNode * +SampleContextTracker::getBaseContextNodeFor(StringRef Name, bool MergeContext) { LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n"); // Convert real function names to MD5 names, if the input profile is // MD5-based. @@ -329,33 +372,35 @@ // create synthetic base profile and merge context profiles // into base profile. for (auto *CSamples : FuncToCtxtProfiles[Name]) { - SampleContext &Context = CSamples->getContext(); - // Skip inlined context profile and also don't re-merge any context - if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) + SampleContext &OldContext = CSamples->getContext(); + // Check to avoid re-merge any context. + if (OldContext.hasState(MergedContext)) continue; - ContextTrieNode *FromNode = getContextFor(Context); + // Redirect the context node from the old context; + ContextTrieNode *FromNode = OldContext.getContextNode(); if (FromNode == Node) continue; + SampleContext &ContextOnNode = + FromNode->getFunctionSamples()->getContext(); + // Skip inlined context profile + if (ContextOnNode.hasState(InlinedContext)) + continue; + ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode); assert((!Node || Node == &ToNode) && "Expect only one base profile"); Node = &ToNode; } } - - // Still no profile even after merge/promotion (if allowed) - if (!Node) - return nullptr; - - return Node->getFunctionSamples(); + return Node; } void SampleContextTracker::markContextSamplesInlined( const FunctionSamples *InlinedSamples) { assert(InlinedSamples && "Expect non-null inlined samples"); LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " - << InlinedSamples->getContext().toString() << "\n"); + << getContextString(*InlinedSamples) << "\n"); InlinedSamples->getContext().setState(InlinedContext); } @@ -407,14 +452,36 @@ FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); assert(FromSamples && "Shouldn't promote a context without profile"); LLVM_DEBUG(dbgs() << " Found context tree root to promote: " - << FromSamples->getContext().toString() << "\n"); + << getContextString(&NodeToPromo) << "\n"); assert(!FromSamples->getContext().hasState(InlinedContext) && "Shouldn't promote inlined context profile"); - uint32_t ContextFramesToRemove = - FromSamples->getContext().getContextFrames().size() - 1; - return promoteMergeContextSamplesTree(NodeToPromo, RootContext, - ContextFramesToRemove); + return promoteMergeContextSamplesTree(NodeToPromo, RootContext); +} + +std::string +SampleContextTracker::getContextString(const FunctionSamples &FSamples) const { + return getContextString(FSamples.getContext().getContextNode()); +} + +std::string +SampleContextTracker::getContextString(ContextTrieNode *Node) const { + SampleContextFrameVector Res; + if (Node == &RootContext) + return std::string(); + Res.emplace_back(Node->getFuncName(), LineLocation(0, 0)); + + ContextTrieNode *PreNode = Node; + Node = Node->getParentContext(); + while (Node && Node != &RootContext) { + Res.emplace_back(Node->getFuncName(), PreNode->getCallSiteLoc()); + PreNode = Node; + Node = Node->getParentContext(); + } + + std::reverse(Res.begin(), Res.end()); + + return SampleContext::getContextString(Res); } void SampleContextTracker::dump() { RootContext.dumpTree(); } @@ -527,8 +594,7 @@ } void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, - ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove) { + ContextTrieNode &ToNode) { FunctionSamples *FromSamples = FromNode.getFunctionSamples(); FunctionSamples *ToSamples = ToNode.getFunctionSamples(); if (FromSamples && ToSamples) { @@ -538,19 +604,19 @@ FromSamples->getContext().setState(MergedContext); if (FromSamples->getContext().hasAttribute(ContextShouldBeInlined)) ToSamples->getContext().setAttribute(ContextShouldBeInlined); + FromSamples->updateContextNode(&ToNode); + FromNode.setFunctionSamples(nullptr); } else if (FromSamples) { // Transfer FromSamples from FromNode to ToNode ToNode.setFunctionSamples(FromSamples); FromSamples->getContext().setState(SyntheticContext); - FromSamples->getContext().promoteOnPath(ContextFramesToRemove); + FromSamples->updateContextNode(&ToNode); FromNode.setFunctionSamples(nullptr); } } ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( - ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove) { - assert(ContextFramesToRemove && "Context to remove can't be empty"); + ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent) { // Ignore call site location if destination is top level under root LineLocation NewCallSiteLoc = LineLocation(0, 0); @@ -567,22 +633,25 @@ if (!ToNode) { // Do not delete node to move from its parent here because // caller is iterating over children of that parent node. - ToNode = &ToNodeParent.moveToChildContext( - NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false); + ToNode = &ToNodeParent.moveToChildContext(NewCallSiteLoc, + std::move(FromNode), false); + LLVM_DEBUG({ + dbgs() << " Context promoted and merged to: " << getContextString(ToNode) + << "\n"; + }); } else { // Destination node exists, merge samples for the context tree - mergeContextNode(FromNode, *ToNode, ContextFramesToRemove); + mergeContextNode(FromNode, *ToNode); LLVM_DEBUG({ if (ToNode->getFunctionSamples()) dbgs() << " Context promoted and merged to: " - << ToNode->getFunctionSamples()->getContext().toString() << "\n"; + << getContextString(ToNode) << "\n"; }); // Recursively promote and merge children for (auto &It : FromNode.getAllChildContext()) { ContextTrieNode &FromChildNode = It.second; - promoteMergeContextSamplesTree(FromChildNode, *ToNode, - ContextFramesToRemove); + promoteMergeContextSamplesTree(FromChildNode, *ToNode); } // Remove children once they're all merged diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1059,8 +1059,7 @@ return; } - ContextTrieNode *Caller = - ContextTracker->getContextFor(Samples->getContext()); + ContextTrieNode *Caller = Samples->getContext().getContextNode(); std::queue CalleeList; CalleeList.push(Caller); while (!CalleeList.empty()) { diff --git a/llvm/tools/llvm-profgen/CSPreInliner.h b/llvm/tools/llvm-profgen/CSPreInliner.h --- a/llvm/tools/llvm-profgen/CSPreInliner.h +++ b/llvm/tools/llvm-profgen/CSPreInliner.h @@ -67,7 +67,7 @@ // size by only keep context that is estimated to be inlined. class CSPreInliner { public: - CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, + CSPreInliner(SampleContextTracker &Tracker, ProfiledBinary &Binary, ProfileSummary *Summary); void run(); @@ -77,10 +77,9 @@ std::vector buildTopDownOrder(); void processFunction(StringRef Name); bool shouldInline(ProfiledInlineCandidate &Candidate); - uint32_t getFuncSize(const FunctionSamples &FSamples); + uint32_t getFuncSize(ContextTrieNode *ContextNode); bool UseContextCost; - SampleContextTracker ContextTracker; - SampleProfileMap &ProfileMap; + SampleContextTracker &ContextTracker; ProfiledBinary &Binary; ProfileSummary *Summary; }; diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -55,14 +55,14 @@ cl::desc( "Replay previous inlining and adjust context profile accordingly")); -CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, - ProfileSummary *Summary) +CSPreInliner::CSPreInliner(SampleContextTracker &Tracker, + ProfiledBinary &Binary, ProfileSummary *Summary) : UseContextCost(UseContextCostForPreInliner), // TODO: Pass in a guid-to-name map in order for // ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes // as their profile context. - ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary), - Summary(Summary) { + ContextTracker(Tracker), Binary(Binary), Summary(Summary) { + ContextTracker.populateFuncToCtxtMap(); // Set default preinliner hot/cold call site threshold tuned with CSSPGO. // for good performance with reasonable profile size. if (!SampleHotCallSiteThreshold.getNumOccurrences()) @@ -104,8 +104,7 @@ // context profile is concerned, only those frames that are children of // current one in the trie is relavent. So we walk the trie instead of call // targets from function profile. - ContextTrieNode *CallerNode = - ContextTracker.getContextFor(CallerSamples->getContext()); + ContextTrieNode *CallerNode = CallerSamples->getContext().getContextNode(); bool HasNewCandidate = false; for (auto &Child : CallerNode->getAllChildContext()) { @@ -129,7 +128,7 @@ // TODO: call site and callee entry count should be mostly consistent, add // check for that. HasNewCandidate = true; - uint32_t CalleeSize = getFuncSize(*CalleeSamples); + uint32_t CalleeSize = getFuncSize(CalleeNode); CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount), CalleeSize); } @@ -137,12 +136,11 @@ return HasNewCandidate; } -uint32_t CSPreInliner::getFuncSize(const FunctionSamples &FSamples) { - if (UseContextCost) { - return Binary.getFuncSizeForContext(FSamples.getContext()); - } +uint32_t CSPreInliner::getFuncSize(ContextTrieNode *ContextNode) { + if (UseContextCost) + return Binary.getFuncSizeForContext(ContextNode); - return FSamples.getBodySamples().size(); + return ContextNode->getFunctionSamples()->getBodySamples().size(); } bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) { @@ -187,7 +185,7 @@ if (!FSamples) return; - unsigned FuncSize = getFuncSize(*FSamples); + unsigned FuncSize = getFuncSize(FSamples->getContext().getContextNode()); unsigned FuncFinalSize = FuncSize; unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit; SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); @@ -216,11 +214,12 @@ } else { ++PreInlNumCSNotInlined; } - LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined") - << " context profile for: " - << Candidate.CalleeSamples->getContext().toString() - << " (callee size: " << Candidate.SizeCost - << ", call count:" << Candidate.CallsiteCount << ")\n"); + LLVM_DEBUG( + dbgs() << (ShouldInline ? " Inlined" : " Outlined") + << " context profile for: " + << ContextTracker.getContextString(*Candidate.CalleeSamples) + << " (callee size: " << Candidate.SizeCost + << ", call count:" << Candidate.CallsiteCount << ")\n"); } if (!CQueue.empty()) { @@ -244,7 +243,8 @@ CQueue.pop(); bool WasInlined = Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined); - dbgs() << " " << Candidate.CalleeSamples->getContext().toString() + dbgs() << " " + << ContextTracker.getContextString(*Candidate.CalleeSamples) << " (candidate size:" << Candidate.SizeCost << ", call count: " << Candidate.CallsiteCount << ", previously " << (WasInlined ? "inlined)\n" : "not inlined)\n"); @@ -254,19 +254,33 @@ void CSPreInliner::run() { #ifndef NDEBUG - auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) { - dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" - << Profiles.size() << " total):\n"; - for (auto &It : Profiles) { - const FunctionSamples &Samples = It.second; - dbgs() << " [" << Samples.getContext().toString() << "] " - << Samples.getTotalSamples() << ":" << Samples.getHeadSamples() - << "\n"; + auto printProfileNames = [](SampleContextTracker &ContextTracker, + bool IsInput) { + std::queue NodeQueue; + NodeQueue.push(&ContextTracker.getRootContext()); + uint32_t Size = 0; + + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + FunctionSamples *FSamples = Node->getFunctionSamples(); + + if (FSamples) { + Size++; + dbgs() << " [" << ContextTracker.getContextString(Node) << "] " + << FSamples->getTotalSamples() << ":" + << FSamples->getHeadSamples() << "\n"; + } + + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); } + dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" + << Size << " total):\n"; }; #endif - LLVM_DEBUG(printProfileNames(ProfileMap, true)); + LLVM_DEBUG(printProfileNames(ContextTracker, true)); // Execute global pre-inliner to estimate a global top-down inline // decision and merge profiles accordingly. This helps with profile @@ -281,24 +295,25 @@ // Not inlined context profiles are merged into its base, so we can // trim out such profiles from the output. - std::vector ProfilesToBeRemoved; - for (auto &It : ProfileMap) { - SampleContext &Context = It.second.getContext(); - if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) { - assert(Context.hasState(MergedContext) && - "Not inlined context profile should be merged already"); - ProfilesToBeRemoved.push_back(It.first); + std::queue NodeQueue; + NodeQueue.push(&ContextTracker.getRootContext()); + + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + FunctionSamples *FProfile = Node->getFunctionSamples(); + + if (FProfile && + (Node->getParentContext() != &ContextTracker.getRootContext() && + !FProfile->getContext().hasState(InlinedContext))) { + Node->setFunctionSamples(nullptr); } - } - for (auto &ContextName : ProfilesToBeRemoved) { - ProfileMap.erase(ContextName); + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); } - // Make sure ProfileMap's key is consistent with FunctionSamples' name. - SampleContextTrimmer(ProfileMap).canonicalizeContextProfiles(); - FunctionSamples::ProfileIsPreInlined = true; - LLVM_DEBUG(printProfileNames(ProfileMap, false)); + LLVM_DEBUG(printProfileNames(ContextTracker, false)); } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -994,19 +994,19 @@ // context profile merging/trimming. computeSummaryAndThreshold(); - buildProfileMap(); - // TODO: free the FunctionSamples' memory that is created in profile - // generator. - // Run global pre-inliner to adjust/merge context profile based on estimated // inline decisions. if (EnableCSPreInliner) { - CSPreInliner(ProfileMap, *Binary, Summary.get()).run(); + CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); // Turn off the profile merger by default unless it is explicitly enabled. if (!CSProfMergeColdContext.getNumOccurrences()) CSProfMergeColdContext = false; } + buildProfileMap(); + // TODO: free the FunctionSamples' memory that is created in profile + // generator. + // Trim and merge cold context profile using cold threshold above. if (TrimColdProfile || CSProfMergeColdContext) { SampleContextTrimmer(ProfileMap) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -161,7 +161,7 @@ // Get function size with a specific context. When there's no exact match // for the given context, try to retrieve the size of that function from // closest matching context. - uint32_t getFuncSizeForContext(const SampleContext &Context); + uint32_t getFuncSizeForContext(ContextTrieNode *Context); // For inlinees that are full optimized away, we can establish zero size using // their remaining probes. @@ -485,8 +485,8 @@ return &I->second; } - uint32_t getFuncSizeForContext(SampleContext &Context) { - return FuncSizeTracker.getFuncSizeForContext(Context); + uint32_t getFuncSizeForContext(ContextTrieNode *ContextNode) { + return FuncSizeTracker.getFuncSizeForContext(ContextNode); } // Load the symbols from debug table and populate into symbol list. diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -85,25 +85,23 @@ } uint32_t -BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) { +BinarySizeContextTracker::getFuncSizeForContext(ContextTrieNode *Node) { ContextTrieNode *CurrNode = &RootContext; ContextTrieNode *PrevNode = nullptr; - SampleContextFrames Frames = Context.getContextFrames(); - int32_t I = Frames.size() - 1; + Optional Size; // Start from top-level context-less function, traverse down the reverse // context trie to find the best/longest match for given context, then // retrieve the size. - - while (CurrNode && I >= 0) { - // Process from leaf function to callers (added to context). - const auto &ChildFrame = Frames[I--]; + LineLocation CallSiteLoc(0, 0); + while (CurrNode && Node->getParentContext() != nullptr) { PrevNode = CurrNode; - CurrNode = - CurrNode->getChildContext(ChildFrame.Location, ChildFrame.FuncName); + CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName()); if (CurrNode && CurrNode->getFunctionSize().hasValue()) Size = CurrNode->getFunctionSize().getValue(); + CallSiteLoc = Node->getCallSiteLoc(); + Node = Node->getParentContext(); } // If we traversed all nodes along the path of the context and haven't