diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -287,10 +287,16 @@ private: void generateLineNumBasedProfile(); - // Lookup or create FunctionSamples for the context - FunctionSamples & - getFunctionProfileForContext(const SampleContextFrameVector &Context, - bool WasLeafInlined = false); + + FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode, + bool WasLeafInlined = false); + + // Lookup or create ContextTrieNode for the context, FunctionSamples is + // created inside this function. + ContextTrieNode * + getContextNodeForContext(const SampleContextFrameVector &Context, + bool WasLeafInlined = false); + // For profiled only functions, on-demand compute their inline context // function byte size which is used by the pre-inliner. void computeSizeForProfiledFunctions(); @@ -300,11 +306,18 @@ void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, const RangeSample &RangeCounters); - void populateBoundarySamplesForFunction(SampleContextFrames ContextId, - FunctionSamples *CallerProfile, + + void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode, const BranchSample &BranchCounters); + + void inferFunctionSamplesOnTrie(ContextTrieNode *Node); + void populateInferredFunctionSamples(); + void updateTotalSamplesOnTrie(ContextTrieNode *Node); + + void updateTotalSamples(); + void generateProbeBasedProfile(); // Fill in function body samples from probes @@ -313,14 +326,24 @@ // Fill in boundary samples for a call probe void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, SampleContextFrames ContextStack); + + ContextTrieNode * + getContextNodeForLeafProbe(SampleContextFrames ContextStack, + const MCDecodedPseudoProbe *LeafProbe); + // Helper function to get FunctionSamples for the leaf probe FunctionSamples & getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe); + void buildProfileMapFromContextTrie(ContextTrieNode *Node, + SampleContextFrameVector &Context); + // Underlying context table serves for sample profile writer. std::unordered_set Contexts; + ContextTrieNode RootContext; + public: // Deduplicate adjacent repeated context sequences up to a given sequence // length. -1 means no size limit. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -699,35 +699,39 @@ showDensitySuggestion(Density); } -FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( - const SampleContextFrameVector &Context, bool WasLeafInlined) { - auto I = ProfileMap.find(SampleContext(Context)); - if (I == ProfileMap.end()) { - // Save the new context for future references. - SampleContextFrames NewContext = *Contexts.insert(Context).first; - SampleContext FContext(NewContext, RawContext); - auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); - if (WasLeafInlined) - FContext.setAttribute(ContextWasInlined); - FunctionSamples &FProfile = Ret.first->second; - FProfile.setContext(FContext); - return Ret.first->second; - } else { - // Update ContextWasInlined attribute for existing contexts. - // The current function can be called in two ways: - // - when processing a probe of the current frame - // - when processing the entry probe of an inlinee's frame, which - // is then used to update the callsite count of the current frame. - // The two can happen in any order, hence here we are making sure - // `ContextWasInlined` is always set as expected. - // TODO: Note that the former does not always happen if no probes of the - // current frame has samples, and if the latter happens, we could lose the - // attribute. This should be fixed. - if (WasLeafInlined) - I->second.getContext().setAttribute(ContextWasInlined); +FunctionSamples * +CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode, + bool WasLeafInlined) { + FunctionSamples *FProfile = ContextNode->getFunctionSamples(); + if (!FProfile) { + FProfile = new FunctionSamples(); + ContextNode->setFunctionSamples(FProfile); } + // Update ContextWasInlined attribute for existing contexts. + // The current function can be called in two ways: + // - when processing a probe of the current frame + // - when processing the entry probe of an inlinee's frame, which + // is then used to update the callsite count of the current frame. + // The two can happen in any order, hence here we are making sure + // `ContextWasInlined` is always set as expected. + // TODO: Note that the former does not always happen if no probes of the + // current frame has samples, and if the latter happens, we could lose the + // attribute. This should be fixed. + if (WasLeafInlined) + FProfile->getContext().setAttribute(ContextWasInlined); + return FProfile; +} - return I->second; +ContextTrieNode *CSProfileGenerator::getContextNodeForContext( + const SampleContextFrameVector &Context, bool WasLeafInlined) { + ContextTrieNode *ContextNode = &RootContext; + for (size_t I = 0; I < Context.size(); I++) { + const LineLocation &CallSite = Context[I].Location; + StringRef CalleeName = Context[I].FuncName; + ContextNode = ContextNode->getOrCreateChildContext(CallSite, CalleeName); + } + getOrCreateFunctionSamples(ContextNode, WasLeafInlined); + return ContextNode; } void CSProfileGenerator::generateProfile() { @@ -761,23 +765,39 @@ Binary->flushSymbolizer(); } +void CSProfileGenerator::updateTotalSamplesOnTrie(ContextTrieNode *Node) { + FunctionSamples *FunctionProfile = Node->getFunctionSamples(); + if (FunctionProfile) + FunctionProfile->updateTotalSamples(); + + for (auto &It : Node->getAllChildContext()) + updateTotalSamplesOnTrie(&It.second); +} + +void CSProfileGenerator::updateTotalSamples() { + if (!UpdateTotalSamples) + return; + + updateTotalSamplesOnTrie(&RootContext); +} + void CSProfileGenerator::generateLineNumBasedProfile() { for (const auto &CI : *SampleCounters) { const auto *CtxKey = cast(CI.first.getPtr()); - FunctionSamples *FunctionProfile = nullptr; + ContextTrieNode *ContextNode = &RootContext; // Sample context will be empty if the jump is an external-to-internal call // pattern, the head samples should be added for the internal function. if (!CtxKey->Context.empty()) { // Get or create function profile for the range - FunctionProfile = &getFunctionProfileForContext(CtxKey->Context, - CtxKey->WasLeafInlined); + ContextNode = + getContextNodeForContext(CtxKey->Context, CtxKey->WasLeafInlined); // Fill in function body samples - populateBodySamplesForFunction(*FunctionProfile, CI.second.RangeCounter); + populateBodySamplesForFunction(*ContextNode->getFunctionSamples(), + CI.second.RangeCounter); } // Fill in boundary sample counts as well as call site samples for calls - populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, - CI.second.BranchCounter); + populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter); } // Fill in call site value sample for inlined calls and also use context to // infer missing samples. Since we don't have call count for inlined @@ -823,8 +843,7 @@ } void CSProfileGenerator::populateBoundarySamplesForFunction( - SampleContextFrames ContextId, FunctionSamples *CallerProfile, - const BranchSample &BranchCounters) { + ContextTrieNode *Node, const BranchSample &BranchCounters) { for (const auto &Entry : BranchCounters) { uint64_t SourceOffset = Entry.first.first; @@ -836,88 +855,104 @@ if (CalleeName.size() == 0) continue; - SampleContextFrameVector CalleeCtx; - if (CallerProfile) { - assert(!ContextId.empty() && - "CallerProfile is null only if ContextId is empty"); + ContextTrieNode *CallerNode = Node; + if (CallerNode != &RootContext) { // Record called target sample and its count auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); if (LeafLoc.hasValue()) { - CallerProfile->addCalledTargetSamples( + CallerNode->getFunctionSamples()->addCalledTargetSamples( LeafLoc->Location.LineOffset, getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, Count); - // Record head sample for called target(callee) - CalleeCtx.append(ContextId.begin(), ContextId.end()); - assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && - "Leaf function name doesn't match"); - CalleeCtx.back() = *LeafLoc; + CallerNode = CallerNode->getParentContext()->getOrCreateChildContext( + LeafLoc->Location, CallerNode->getFuncName()); } } - CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); - FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); - CalleeProfile.addHeadSamples(Count); + + ContextTrieNode *CalleeNode = + CallerNode->getOrCreateChildContext(LineLocation(0, 0), CalleeName); + FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode); + CalleeProfile->addHeadSamples(Count); } } -static SampleContextFrame -getCallerContext(SampleContextFrames CalleeContext, - SampleContextFrameVector &CallerContext) { - assert(CalleeContext.size() > 1 && "Unexpected empty context"); - CalleeContext = CalleeContext.drop_back(); - CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); - SampleContextFrame CallerFrame = CallerContext.back(); - CallerContext.back().Location = LineLocation(0, 0); - return CallerFrame; +void CSProfileGenerator::inferFunctionSamplesOnTrie(ContextTrieNode *Node) { + // Child node should be processed before its parent, so traverse the tree in + // post-order + for (auto &It : Node->getAllChildContext()) + inferFunctionSamplesOnTrie(&It.second); + + FunctionSamples *CalleeProfile = Node->getFunctionSamples(); + if (!CalleeProfile) + return; + // If we already have head sample counts, we must have value profile + // for call sites added already. Skip to avoid double counting. + if (CalleeProfile->getHeadSamples()) + return; + ContextTrieNode *ParentNode = Node->getParentContext(); + // If we don't have context, nothing to do for caller's call site. + // This could happen for entry point function. + if (ParentNode == &RootContext) + return; + + ContextTrieNode *CallerNode = + ParentNode->getParentContext()->getOrCreateChildContext( + LineLocation(0, 0), ParentNode->getFuncName()); + // It's possible that we haven't seen any sample directly in the caller, + // in which case CallerProfile will not exist. But we can't modify + // ProfileMap while iterating it. + // TODO: created function profile for those callers too + if (!CallerNode->getFunctionSamples()) + return; + LineLocation CallerLeafFrameLoc = ParentNode->getCallSiteLoc(); + FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); + // Since we don't have call count for inlined functions, we + // estimate it from inlinee's profile using entry body sample. + uint64_t EstimatedCallCount = CalleeProfile->getEntrySamples(); + // If we don't have samples with location, use 1 to indicate live. + if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) + EstimatedCallCount = 1; + CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset, + CallerLeafFrameLoc.Discriminator, + Node->getFuncName(), EstimatedCallCount); + CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset, + CallerLeafFrameLoc.Discriminator, + EstimatedCallCount); + CallerProfile.addTotalSamples(EstimatedCallCount); } void CSProfileGenerator::populateInferredFunctionSamples() { - for (const auto &Item : ProfileMap) { - const auto &CalleeContext = Item.first; - const FunctionSamples &CalleeProfile = Item.second; + inferFunctionSamplesOnTrie(&RootContext); +} - // If we already have head sample counts, we must have value profile - // for call sites added already. Skip to avoid double counting. - if (CalleeProfile.getHeadSamples()) - continue; - // If we don't have context, nothing to do for caller's call site. - // This could happen for entry point function. - if (CalleeContext.isBaseContext()) - continue; +void CSProfileGenerator::buildProfileMapFromContextTrie( + ContextTrieNode *Node, SampleContextFrameVector &Context) { + FunctionSamples *FProfile = Node->getFunctionSamples(); + if (FProfile) { + // Save the new context for future references. + SampleContextFrames NewContext = *Contexts.insert(Context).first; + SampleContext FContext(NewContext, RawContext); + // Copy all the old attributes + FContext.setAllAttributes(FProfile->getContext().getAllAttributes()); + auto Ret = ProfileMap.emplace(FContext, *FProfile); + FunctionSamples &NewProfile = Ret.first->second; + NewProfile.setContext(FContext); + delete (FProfile); + } - // Infer Caller's frame loc and context ID through string splitting - SampleContextFrameVector CallerContextId; - SampleContextFrame &&CallerLeafFrameLoc = - getCallerContext(CalleeContext.getContextFrames(), CallerContextId); - SampleContextFrames CallerContext(CallerContextId); - - // It's possible that we haven't seen any sample directly in the caller, - // in which case CallerProfile will not exist. But we can't modify - // ProfileMap while iterating it. - // TODO: created function profile for those callers too - if (ProfileMap.find(CallerContext) == ProfileMap.end()) - continue; - FunctionSamples &CallerProfile = ProfileMap[CallerContext]; - - // Since we don't have call count for inlined functions, we - // estimate it from inlinee's profile using entry body sample. - uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); - // If we don't have samples with location, use 1 to indicate live. - if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) - EstimatedCallCount = 1; - CallerProfile.addCalledTargetSamples( - CallerLeafFrameLoc.Location.LineOffset, - CallerLeafFrameLoc.Location.Discriminator, - CalleeProfile.getContext().getName(), EstimatedCallCount); - CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, - CallerLeafFrameLoc.Location.Discriminator, - EstimatedCallCount); - CallerProfile.addTotalSamples(EstimatedCallCount); + for (auto &It : Node->getAllChildContext()) { + ContextTrieNode *ChildNode = &It.second; + Context.emplace_back(ChildNode->getFuncName(), ChildNode->getCallSiteLoc()); + buildProfileMapFromContextTrie(ChildNode, Context); + Context.pop_back(); } } void CSProfileGenerator::postProcessProfiles() { + SampleContextFrameVector Context; + buildProfileMapFromContextTrie(&RootContext, Context); + // Compute hot/cold threshold based on profile. This will be used for cold // context profile merging/trimming. computeSummaryAndThreshold(); @@ -1059,8 +1094,10 @@ // doesn't belong to current context, filter them out. if (!Probe->isBlock() || Count == 0) continue; - FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStack, Probe); + + ContextTrieNode *ContextNode = + getContextNodeForLeafProbe(ContextStack, Probe); + FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); // Record the current frame and FunctionProfile whenever samples are // collected for non-danglie probes. This is for reporting all of the // zero count probes of the frame later. @@ -1071,25 +1108,23 @@ FunctionProfile.addHeadSamples(Count); // Look up for the caller's function profile const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); - SampleContextFrames CalleeContextId = - FunctionProfile.getContext().getContextFrames(); - if (InlinerDesc != nullptr && CalleeContextId.size() > 1) { + ContextTrieNode *CallerNode = ContextNode->getParentContext(); + if (InlinerDesc != nullptr && CallerNode != &RootContext) { // Since the context id will be compressed, we have to use callee's // context id to infer caller's context id to ensure they share the // same context prefix. - SampleContextFrameVector CallerContextId; - SampleContextFrame &&CallerLeafFrameLoc = - getCallerContext(CalleeContextId, CallerContextId); - uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; + uint64_t CallerIndex = CallerNode->getCallSiteLoc().LineOffset; assert(CallerIndex && "Inferred caller's location index shouldn't be zero!"); + CallerNode = CallerNode->getParentContext()->getOrCreateChildContext( + LineLocation(0, 0), CallerNode->getFuncName()); FunctionSamples &CallerProfile = - getFunctionProfileForContext(CallerContextId); + *getOrCreateFunctionSamples(CallerNode); CallerProfile.setFunctionHash(InlinerDesc->FuncHash); CallerProfile.addBodySamples(CallerIndex, 0, Count); CallerProfile.addTotalSamples(Count); - CallerProfile.addCalledTargetSamples( - CallerIndex, 0, FunctionProfile.getContext().getName(), Count); + CallerProfile.addCalledTargetSamples(CallerIndex, 0, + ContextNode->getFuncName(), Count); } } } @@ -1129,7 +1164,7 @@ } } -FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( +ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { // Explicitly copy the context for appending the leaf context @@ -1149,10 +1184,16 @@ const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); - FunctionSamples &FunctionProile = - getFunctionProfileForContext(NewContextStack, WasLeafInlined); - FunctionProile.setFunctionHash(FuncDesc->FuncHash); - return FunctionProile; + ContextTrieNode *ContextNode = + getContextNodeForContext(NewContextStack, WasLeafInlined); + ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash); + return ContextNode; +} + +FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( + SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { + return *getContextNodeForLeafProbe(ContextStack, LeafProbe) + ->getFunctionSamples(); } } // end namespace sampleprof