diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -294,10 +294,16 @@ private: void generateLineNumBasedProfile(); - // Lookup or create FunctionSamples for the context - FunctionSamples & - getFunctionProfileForContext(const SampleContextFrameVector &Context, - bool WasLeafInlined = false); + + FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode, + bool WasLeafInlined = false); + + // Lookup or create ContextTrieNode for the context, FunctionSamples is + // created inside this function. + ContextTrieNode * + getOrCreateContextNodeForContext(const SampleContextFrames Context, + bool WasLeafInlined = false); + // For profiled only functions, on-demand compute their inline context // function byte size which is used by the pre-inliner. void computeSizeForProfiledFunctions(); @@ -307,11 +313,18 @@ void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, const RangeSample &RangeCounters); - void populateBoundarySamplesForFunction(SampleContextFrames ContextId, - FunctionSamples *CallerProfile, + + void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode, const BranchSample &BranchCounters); + + void inferFunctionSamplesOnTrie(ContextTrieNode &Node); + void populateInferredFunctionSamples(); + void updateTotalSamplesOnTrie(ContextTrieNode &Node); + + void updateTotalSamples(); + void generateProbeBasedProfile(); // Fill in function body samples from probes @@ -320,14 +333,26 @@ // Fill in boundary samples for a call probe void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, SampleContextFrames ContextStack); + + ContextTrieNode * + getContextNodeForLeafProbe(SampleContextFrames ContextStack, + const MCDecodedPseudoProbe *LeafProbe); + // Helper function to get FunctionSamples for the leaf probe FunctionSamples & getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe); + void buildProfileMap(ContextTrieNode &Node, + SampleContextFrameVector &Context); + + void buildProfileMap(); + // Underlying context table serves for sample profile writer. std::unordered_set Contexts; + ContextTrieNode RootContext; + public: // Deduplicate adjacent repeated context sequences up to a given sequence // length. -1 means no size limit. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -710,35 +710,39 @@ showDensitySuggestion(Density); } -FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( - const SampleContextFrameVector &Context, bool WasLeafInlined) { - auto I = ProfileMap.find(SampleContext(Context)); - if (I == ProfileMap.end()) { - // Save the new context for future references. - SampleContextFrames NewContext = *Contexts.insert(Context).first; - SampleContext FContext(NewContext, RawContext); - auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); - if (WasLeafInlined) - FContext.setAttribute(ContextWasInlined); - FunctionSamples &FProfile = Ret.first->second; - FProfile.setContext(FContext); - return Ret.first->second; - } else { - // Update ContextWasInlined attribute for existing contexts. - // The current function can be called in two ways: - // - when processing a probe of the current frame - // - when processing the entry probe of an inlinee's frame, which - // is then used to update the callsite count of the current frame. - // The two can happen in any order, hence here we are making sure - // `ContextWasInlined` is always set as expected. - // TODO: Note that the former does not always happen if no probes of the - // current frame has samples, and if the latter happens, we could lose the - // attribute. This should be fixed. - if (WasLeafInlined) - I->second.getContext().setAttribute(ContextWasInlined); +FunctionSamples * +CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode, + bool WasLeafInlined) { + FunctionSamples *FProfile = ContextNode->getFunctionSamples(); + if (!FProfile) { + FProfile = new FunctionSamples(); + ContextNode->setFunctionSamples(FProfile); } + // Update ContextWasInlined attribute for existing contexts. + // The current function can be called in two ways: + // - when processing a probe of the current frame + // - when processing the entry probe of an inlinee's frame, which + // is then used to update the callsite count of the current frame. + // The two can happen in any order, hence here we are making sure + // `ContextWasInlined` is always set as expected. + // TODO: Note that the former does not always happen if no probes of the + // current frame has samples, and if the latter happens, we could lose the + // attribute. This should be fixed. + if (WasLeafInlined) + FProfile->getContext().setAttribute(ContextWasInlined); + return FProfile; +} - return I->second; +ContextTrieNode *CSProfileGenerator::getOrCreateContextNodeForContext( + const SampleContextFrames Context, bool WasLeafInlined) { + ContextTrieNode *ContextNode = &RootContext; + for (size_t I = 0; I < Context.size(); I++) { + const LineLocation &CallSite = Context[I].Location; + StringRef CalleeName = Context[I].FuncName; + ContextNode = ContextNode->getOrCreateChildContext(CallSite, CalleeName); + } + getOrCreateFunctionSamples(ContextNode, WasLeafInlined); + return ContextNode; } void CSProfileGenerator::generateProfile() { @@ -772,23 +776,39 @@ Binary->flushSymbolizer(); } +void CSProfileGenerator::updateTotalSamplesOnTrie(ContextTrieNode &Node) { + FunctionSamples *FunctionProfile = Node.getFunctionSamples(); + if (FunctionProfile) + FunctionProfile->updateTotalSamples(); + + for (auto &It : Node.getAllChildContext()) + updateTotalSamplesOnTrie(It.second); +} + +void CSProfileGenerator::updateTotalSamples() { + if (!UpdateTotalSamples) + return; + + updateTotalSamplesOnTrie(RootContext); +} + void CSProfileGenerator::generateLineNumBasedProfile() { for (const auto &CI : *SampleCounters) { const auto *CtxKey = cast(CI.first.getPtr()); - FunctionSamples *FunctionProfile = nullptr; + ContextTrieNode *ContextNode = &RootContext; // Sample context will be empty if the jump is an external-to-internal call // pattern, the head samples should be added for the internal function. if (!CtxKey->Context.empty()) { // Get or create function profile for the range - FunctionProfile = &getFunctionProfileForContext(CtxKey->Context, - CtxKey->WasLeafInlined); + ContextNode = getOrCreateContextNodeForContext(CtxKey->Context, + CtxKey->WasLeafInlined); // Fill in function body samples - populateBodySamplesForFunction(*FunctionProfile, CI.second.RangeCounter); + populateBodySamplesForFunction(*ContextNode->getFunctionSamples(), + CI.second.RangeCounter); } // Fill in boundary sample counts as well as call site samples for calls - populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, - CI.second.BranchCounter); + populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter); } // Fill in call site value sample for inlined calls and also use context to // infer missing samples. Since we don't have call count for inlined @@ -834,8 +854,7 @@ } void CSProfileGenerator::populateBoundarySamplesForFunction( - SampleContextFrames ContextId, FunctionSamples *CallerProfile, - const BranchSample &BranchCounters) { + ContextTrieNode *Node, const BranchSample &BranchCounters) { for (const auto &Entry : BranchCounters) { uint64_t SourceOffset = Entry.first.first; @@ -847,88 +866,104 @@ if (CalleeName.size() == 0) continue; - SampleContextFrameVector CalleeCtx; - if (CallerProfile) { - assert(!ContextId.empty() && - "CallerProfile is null only if ContextId is empty"); + ContextTrieNode *CallerNode = Node; + if (CallerNode != &RootContext) { // Record called target sample and its count auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); if (LeafLoc.hasValue()) { - CallerProfile->addCalledTargetSamples( + CallerNode->getFunctionSamples()->addCalledTargetSamples( LeafLoc->Location.LineOffset, getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, Count); - // Record head sample for called target(callee) - CalleeCtx.append(ContextId.begin(), ContextId.end()); - assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && - "Leaf function name doesn't match"); - CalleeCtx.back() = *LeafLoc; + CallerNode = CallerNode->getParentContext()->getOrCreateChildContext( + LeafLoc->Location, CallerNode->getFuncName()); } } - CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); - FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); - CalleeProfile.addHeadSamples(Count); + + ContextTrieNode *CalleeNode = + CallerNode->getOrCreateChildContext(LineLocation(0, 0), CalleeName); + FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode); + CalleeProfile->addHeadSamples(Count); } } -static SampleContextFrame -getCallerContext(SampleContextFrames CalleeContext, - SampleContextFrameVector &CallerContext) { - assert(CalleeContext.size() > 1 && "Unexpected empty context"); - CalleeContext = CalleeContext.drop_back(); - CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); - SampleContextFrame CallerFrame = CallerContext.back(); - CallerContext.back().Location = LineLocation(0, 0); - return CallerFrame; +void CSProfileGenerator::inferFunctionSamplesOnTrie(ContextTrieNode &Node) { + // There is no call jmp sample between the inliner and inlinee, we need to use + // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s + // sample depends on child(inlinee)'s sample, so traverse the tree in + // post-order. + for (auto &It : Node.getAllChildContext()) + inferFunctionSamplesOnTrie(It.second); + + FunctionSamples *CalleeProfile = Node.getFunctionSamples(); + if (!CalleeProfile) + return; + // If we already have head sample counts, we must have value profile + // for call sites added already. Skip to avoid double counting. + if (CalleeProfile->getHeadSamples()) + return; + ContextTrieNode *ParentNode = Node.getParentContext(); + // If we don't have context, nothing to do for caller's call site. + // This could happen for entry point function. + if (ParentNode == &RootContext) + return; + + ContextTrieNode *CallerNode = + ParentNode->getParentContext()->getOrCreateChildContext( + LineLocation(0, 0), ParentNode->getFuncName()); + LineLocation CallerLeafFrameLoc = ParentNode->getCallSiteLoc(); + FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); + // Since we don't have call count for inlined functions, we + // estimate it from inlinee's profile using entry body sample. + uint64_t EstimatedCallCount = CalleeProfile->getEntrySamples(); + // If we don't have samples with location, use 1 to indicate live. + if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) + EstimatedCallCount = 1; + CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset, + CallerLeafFrameLoc.Discriminator, + Node.getFuncName(), EstimatedCallCount); + CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset, + CallerLeafFrameLoc.Discriminator, + EstimatedCallCount); + CallerProfile.addTotalSamples(EstimatedCallCount); } void CSProfileGenerator::populateInferredFunctionSamples() { - for (const auto &Item : ProfileMap) { - const auto &CalleeContext = Item.first; - const FunctionSamples &CalleeProfile = Item.second; + inferFunctionSamplesOnTrie(RootContext); +} - // If we already have head sample counts, we must have value profile - // for call sites added already. Skip to avoid double counting. - if (CalleeProfile.getHeadSamples()) - continue; - // If we don't have context, nothing to do for caller's call site. - // This could happen for entry point function. - if (CalleeContext.isBaseContext()) - continue; +void CSProfileGenerator::buildProfileMap(ContextTrieNode &Node, + SampleContextFrameVector &Context) { + FunctionSamples *FProfile = Node.getFunctionSamples(); + if (FProfile) { + // Save the new context for future references. + SampleContextFrames NewContext = *Contexts.insert(Context).first; + SampleContext FContext(NewContext, RawContext); + // Copy all the old attributes + FContext.setAllAttributes(FProfile->getContext().getAllAttributes()); + auto Ret = ProfileMap.emplace(FContext, *FProfile); + FunctionSamples &NewProfile = Ret.first->second; + NewProfile.setContext(FContext); + delete (FProfile); + } - // Infer Caller's frame loc and context ID through string splitting - SampleContextFrameVector CallerContextId; - SampleContextFrame &&CallerLeafFrameLoc = - getCallerContext(CalleeContext.getContextFrames(), CallerContextId); - SampleContextFrames CallerContext(CallerContextId); - - // It's possible that we haven't seen any sample directly in the caller, - // in which case CallerProfile will not exist. But we can't modify - // ProfileMap while iterating it. - // TODO: created function profile for those callers too - if (ProfileMap.find(CallerContext) == ProfileMap.end()) - continue; - FunctionSamples &CallerProfile = ProfileMap[CallerContext]; - - // Since we don't have call count for inlined functions, we - // estimate it from inlinee's profile using entry body sample. - uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); - // If we don't have samples with location, use 1 to indicate live. - if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) - EstimatedCallCount = 1; - CallerProfile.addCalledTargetSamples( - CallerLeafFrameLoc.Location.LineOffset, - CallerLeafFrameLoc.Location.Discriminator, - CalleeProfile.getContext().getName(), EstimatedCallCount); - CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, - CallerLeafFrameLoc.Location.Discriminator, - EstimatedCallCount); - CallerProfile.addTotalSamples(EstimatedCallCount); + for (auto &It : Node.getAllChildContext()) { + ContextTrieNode &ChildNode = It.second; + Context.emplace_back(ChildNode.getFuncName(), ChildNode.getCallSiteLoc()); + buildProfileMap(ChildNode, Context); + Context.pop_back(); } } +void CSProfileGenerator::buildProfileMap() { + SampleContextFrameVector Context; + buildProfileMap(RootContext, Context); +} + void CSProfileGenerator::postProcessProfiles() { + buildProfileMap(); + // Compute hot/cold threshold based on profile. This will be used for cold // context profile merging/trimming. computeSummaryAndThreshold(); @@ -1069,8 +1104,10 @@ // doesn't belong to current context, filter them out. if (!Probe->isBlock() || Count == 0) continue; - FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStack, Probe); + + ContextTrieNode *ContextNode = + getContextNodeForLeafProbe(ContextStack, Probe); + FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); // Record the current frame and FunctionProfile whenever samples are // collected for non-danglie probes. This is for reporting all of the // zero count probes of the frame later. @@ -1081,25 +1118,23 @@ FunctionProfile.addHeadSamples(Count); // Look up for the caller's function profile const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); - SampleContextFrames CalleeContextId = - FunctionProfile.getContext().getContextFrames(); - if (InlinerDesc != nullptr && CalleeContextId.size() > 1) { + ContextTrieNode *CallerNode = ContextNode->getParentContext(); + if (InlinerDesc != nullptr && CallerNode != &RootContext) { // Since the context id will be compressed, we have to use callee's // context id to infer caller's context id to ensure they share the // same context prefix. - SampleContextFrameVector CallerContextId; - SampleContextFrame &&CallerLeafFrameLoc = - getCallerContext(CalleeContextId, CallerContextId); - uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; + uint64_t CallerIndex = CallerNode->getCallSiteLoc().LineOffset; assert(CallerIndex && "Inferred caller's location index shouldn't be zero!"); + CallerNode = CallerNode->getParentContext()->getOrCreateChildContext( + LineLocation(0, 0), CallerNode->getFuncName()); FunctionSamples &CallerProfile = - getFunctionProfileForContext(CallerContextId); + *getOrCreateFunctionSamples(CallerNode); CallerProfile.setFunctionHash(InlinerDesc->FuncHash); CallerProfile.addBodySamples(CallerIndex, 0, Count); CallerProfile.addTotalSamples(Count); - CallerProfile.addCalledTargetSamples( - CallerIndex, 0, FunctionProfile.getContext().getName(), Count); + CallerProfile.addCalledTargetSamples(CallerIndex, 0, + ContextNode->getFuncName(), Count); } } } @@ -1139,7 +1174,7 @@ } } -FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( +ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { // Explicitly copy the context for appending the leaf context @@ -1159,10 +1194,16 @@ const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); - FunctionSamples &FunctionProile = - getFunctionProfileForContext(NewContextStack, WasLeafInlined); - FunctionProile.setFunctionHash(FuncDesc->FuncHash); - return FunctionProile; + ContextTrieNode *ContextNode = + getOrCreateContextNodeForContext(NewContextStack, WasLeafInlined); + ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash); + return ContextNode; +} + +FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( + SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { + return *getContextNodeForLeafProbe(ContextStack, LeafProbe) + ->getFunctionSamples(); } } // end namespace sampleprof