diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -168,6 +168,8 @@ void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; }; +using MCPseduoProbeFrameLocation = std::pair; + class MCDecodedPseudoProbe : public MCPseudoProbeBase { uint64_t Address; MCDecodedPseudoProbeInlineTree *InlineTree; @@ -189,13 +191,13 @@ // Get the inlined context by traversing current inline tree backwards, // each tree node has its InlineSite which is taken as the context. // \p ContextStack is populated in root to leaf order - void getInlineContext(SmallVectorImpl &ContextStack, - const GUIDProbeFunctionMap &GUID2FuncMAP, - bool ShowName) const; + void + getInlineContext(SmallVectorImpl &ContextStack, + const GUIDProbeFunctionMap &GUID2FuncMAP) const; // Helper function to get the string from context stack - std::string getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP, - bool ShowName) const; + std::string + getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const; // Print pseudo probe while disassembling void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP, @@ -382,10 +384,10 @@ // Current probe(bar:3) inlined at foo:2 then inlined at main:1 // IncludeLeaf = true, Output: [main:1, foo:2, bar:3] // IncludeLeaf = false, Output: [main:1, foo:2] - void - getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, - SmallVectorImpl &InlineContextStack, - bool IncludeLeaf) const; + void getInlineContextForProbe( + const MCDecodedPseudoProbe *Probe, + SmallVectorImpl &InlineContextStack, + bool IncludeLeaf) const; const AddressProbesMap &getAddress2ProbesMap() const { return Address2ProbesMap; diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -221,24 +221,17 @@ } void MCDecodedPseudoProbe::getInlineContext( - SmallVectorImpl &ContextStack, - const GUIDProbeFunctionMap &GUID2FuncMAP, bool ShowName) const { + SmallVectorImpl &ContextStack, + const GUIDProbeFunctionMap &GUID2FuncMAP) const { uint32_t Begin = ContextStack.size(); MCDecodedPseudoProbeInlineTree *Cur = InlineTree; // It will add the string of each node's inline site during iteration. // Note that it won't include the probe's belonging function(leaf location) while (Cur->hasInlineSite()) { - std::string ContextStr; - if (ShowName) { - StringRef FuncName = - getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite)); - ContextStr += FuncName.str(); - } else { - ContextStr += Twine(std::get<0>(Cur->ISite)).str(); - } - ContextStr += ":"; - ContextStr += Twine(std::get<1>(Cur->ISite)).str(); - ContextStack.emplace_back(ContextStr); + StringRef FuncName = + getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite)); + ContextStack.emplace_back( + MCPseduoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite))); Cur = static_cast(Cur->Parent); } // Make the ContextStack in caller-callee order @@ -246,14 +239,14 @@ } std::string MCDecodedPseudoProbe::getInlineContextStr( - const GUIDProbeFunctionMap &GUID2FuncMAP, bool ShowName) const { + const GUIDProbeFunctionMap &GUID2FuncMAP) const { std::ostringstream OContextStr; - SmallVector ContextStack; - getInlineContext(ContextStack, GUID2FuncMAP, ShowName); - for (auto &CxtStr : ContextStack) { + SmallVector ContextStack; + getInlineContext(ContextStack, GUID2FuncMAP); + for (auto &Cxt : ContextStack) { if (OContextStr.str().size()) OContextStr << " @ "; - OContextStr << CxtStr; + OContextStr << Cxt.first.str() << ":" << Cxt.second; } return OContextStr.str(); } @@ -273,7 +266,7 @@ } OS << "Index: " << Index << " "; OS << "Type: " << PseudoProbeTypeStr[static_cast(Type)] << " "; - std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP, ShowName); + std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP); if (InlineContextStr.size()) { OS << "Inlined: @ "; OS << InlineContextStr; @@ -552,15 +545,16 @@ void MCPseudoProbeDecoder::getInlineContextForProbe( const MCDecodedPseudoProbe *Probe, - SmallVectorImpl &InlineContextStack, bool IncludeLeaf) const { - Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true); + SmallVectorImpl &InlineContextStack, + bool IncludeLeaf) const { + Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap); if (!IncludeLeaf) return; // Note that the context from probe doesn't include leaf frame, // hence we need to retrieve and prepend leaf if requested. const auto *FuncDesc = getFuncDescForGUID(Probe->getGuid()); - InlineContextStack.emplace_back(FuncDesc->FuncName + ":" + - Twine(Probe->getIndex()).str()); + InlineContextStack.emplace_back( + MCPseduoProbeFrameLocation(FuncDesc->FuncName, Probe->getIndex())); } const MCPseudoProbeFuncDesc *MCPseudoProbeDecoder::getInlinerDescForProbe( diff --git a/llvm/tools/llvm-profgen/CSPreInliner.h b/llvm/tools/llvm-profgen/CSPreInliner.h --- a/llvm/tools/llvm-profgen/CSPreInliner.h +++ b/llvm/tools/llvm-profgen/CSPreInliner.h @@ -67,7 +67,7 @@ // size by only keep context that is estimated to be inlined. class CSPreInliner { public: - CSPreInliner(StringMap &Profiles, uint64_t HotThreshold, + CSPreInliner(SampleProfileMap &Profiles, uint64_t HotThreshold, uint64_t ColdThreshold); void run(); @@ -78,7 +78,7 @@ void processFunction(StringRef Name); bool shouldInline(ProfiledInlineCandidate &Candidate); SampleContextTracker ContextTracker; - StringMap &ProfileMap; + SampleProfileMap &ProfileMap; // Count thresholds to answer isHotCount and isColdCount queries. // Mirrors the threshold in ProfileSummaryInfo. diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -30,8 +30,8 @@ cl::desc( "Replay previous inlining and adjust context profile accordingly")); -CSPreInliner::CSPreInliner(StringMap &Profiles, - uint64_t HotThreshold, uint64_t ColdThreshold) +CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, uint64_t HotThreshold, + uint64_t ColdThreshold) : ContextTracker(Profiles), ProfileMap(Profiles), HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {} @@ -146,11 +146,12 @@ FuncFinalSize += Candidate.SizeCost; getInlineCandidates(CQueue, Candidate.CalleeSamples); } - LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined") - << " context profile for: " - << Candidate.CalleeSamples->getNameWithContext() - << " (callee size: " << Candidate.SizeCost - << ", call count:" << Candidate.CallsiteCount << ")\n"); + LLVM_DEBUG( + dbgs() << (ShouldInline ? " Inlined" : " Outlined") + << " context profile for: " + << Candidate.CalleeSamples->getContext().getContextString() + << " (callee size: " << Candidate.SizeCost + << ", call count:" << Candidate.CallsiteCount << ")\n"); } LLVM_DEBUG({ @@ -165,7 +166,8 @@ CQueue.pop(); bool WasInlined = Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined); - dbgs() << " " << Candidate.CalleeSamples->getNameWithContext() + dbgs() << " " + << Candidate.CalleeSamples->getContext().getContextString() << " (candidate size:" << Candidate.SizeCost << ", call count: " << Candidate.CallsiteCount << ", previously " << (WasInlined ? "inlined)\n" : "not inlined)\n"); @@ -175,13 +177,12 @@ void CSPreInliner::run() { #ifndef NDEBUG - auto printProfileNames = [](StringMap &Profiles, - bool IsInput) { + auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) { dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" << Profiles.size() << " total):\n"; for (auto &It : Profiles) { const FunctionSamples &Samples = It.second; - dbgs() << " [" << Samples.getNameWithContext() << "] " + dbgs() << " [" << Samples.getContext().getContextString() << "] " << Samples.getTotalSamples() << ":" << Samples.getHeadSamples() << "\n"; } @@ -203,17 +204,17 @@ // Not inlined context profiles are merged into its base, so we can // trim out such profiles from the output. - std::vector ProfilesToBeRemoved; + std::vector ProfilesToBeRemoved; for (auto &It : ProfileMap) { SampleContext Context = It.second.getContext(); if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) { assert(Context.hasState(MergedContext) && "Not inlined context profile should be merged already"); - ProfilesToBeRemoved.push_back(It.first()); + ProfilesToBeRemoved.push_back(It.first); } } - for (StringRef ContextName : ProfilesToBeRemoved) { + for (auto &ContextName : ProfilesToBeRemoved) { ProfileMap.erase(ContextName); } diff --git a/llvm/tools/llvm-profgen/CallContext.h b/llvm/tools/llvm-profgen/CallContext.h --- a/llvm/tools/llvm-profgen/CallContext.h +++ b/llvm/tools/llvm-profgen/CallContext.h @@ -18,24 +18,22 @@ namespace sampleprof { // Function name, LineLocation -typedef std::pair FrameLocation; +using SampleContextStorageType = SampleContextStorageType; -typedef SmallVector FrameLocationStack; - -inline std::string getCallSite(const FrameLocation &Callsite) { - std::string CallsiteStr = Callsite.first; +inline std::string getCallSite(const SampleCallSiteType &Callsite) { + std::string CallsiteStr = Callsite.CallerName.str(); CallsiteStr += ":"; - CallsiteStr += Twine(Callsite.second.LineOffset).str(); - if (Callsite.second.Discriminator > 0) { + CallsiteStr += Twine(Callsite.Callsite.LineOffset).str(); + if (Callsite.Callsite.Discriminator > 0) { CallsiteStr += "."; - CallsiteStr += Twine(Callsite.second.Discriminator).str(); + CallsiteStr += Twine(Callsite.Callsite.Discriminator).str(); } return CallsiteStr; } // TODO: This operation is expansive. If it ever gets called multiple times we // may think of making a class wrapper with internal states for it. -inline std::string getLocWithContext(const FrameLocationStack &Context) { +inline std::string getLocWithContext(const SampleContextStorageType &Context) { std::ostringstream OContextStr; for (const auto &Callsite : Context) { if (OContextStr.str().size()) @@ -48,7 +46,7 @@ // Reverse call context, i.e., in the order of callee frames to caller frames, // is useful during instruction printing or pseudo probe printing. inline std::string -getReversedLocWithContext(const FrameLocationStack &Context) { +getReversedLocWithContext(const SampleContextStorageType &Context) { std::ostringstream OContextStr; for (const auto &Callsite : reverse(Context)) { if (OContextStr.str().size()) diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -345,7 +345,8 @@ // String based context id struct StringBasedCtxKey : public ContextKey { - std::string Context; + SampleContextStorageType Context; + bool WasLeafInlined; StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){}; static bool classof(const ContextKey *K) { @@ -357,7 +358,7 @@ return Context == Other->Context; } - void genHashCode() { HashCode = hash_value(Context); } + void genHashCode() { HashCode = hash_value(SampleContextRefType(Context)); } }; // Probe based context key as the intermediate key of context diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -93,8 +93,7 @@ std::shared_ptr FrameStack::getContextKey() { std::shared_ptr KeyStr = std::make_shared(); - KeyStr->Context = - Binary->getExpandedContextStr(Stack, KeyStr->WasLeafInlined); + KeyStr->Context = Binary->getExpandedContext(Stack, KeyStr->WasLeafInlined); if (KeyStr->Context.empty()) return nullptr; KeyStr->genHashCode(); @@ -410,21 +409,17 @@ static std::string getContextKeyStr(ContextKey *K, const ProfiledBinary *Binary) { - std::string ContextStr; if (const auto *CtxKey = dyn_cast(K)) { - return CtxKey->Context; + return SampleContext::getContextString(CtxKey->Context); } else if (const auto *CtxKey = dyn_cast(K)) { - SmallVector ContextStack; + SampleContextStorageType ContextStack; for (const auto *Probe : CtxKey->Probes) { Binary->getInlineContextForProbe(Probe, ContextStack, true); } - for (const auto &Context : ContextStack) { - if (ContextStr.size()) - ContextStr += " @ "; - ContextStr += Context; - } + return SampleContext::getContextString(ContextStack); + } else { + llvm_unreachable("NYI"); } - return ContextStr; } static void printRangeCounter(ContextSampleCounterMap &Counter, diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -33,7 +33,7 @@ virtual void generateProfile() = 0; // Use SampleProfileWriter to serialize profile map virtual void write(std::unique_ptr Writer, - StringMap &ProfileMap); + SampleProfileMap &ProfileMap); void write(); protected: @@ -56,7 +56,7 @@ const RangeSample &Ranges); // Used by SampleProfileWriter - StringMap ProfileMap; + SampleProfileMap ProfileMap; }; class CSProfileGenerator : public ProfileGenerator { @@ -187,32 +187,29 @@ protected: // Lookup or create FunctionSamples for the context - FunctionSamples &getFunctionProfileForContext(StringRef ContextId, + FunctionSamples &getFunctionProfileForContext(SampleContextRefType ContextId, bool WasLeafInlined = false); // Post processing for profiles before writing out, such as mermining // and trimming cold profiles, running preinliner on profiles. void postProcessProfiles(); void computeSummaryAndThreshold(); void write(std::unique_ptr Writer, - StringMap &ProfileMap) override; + SampleProfileMap &ProfileMap) override; // Thresholds from profile summary to answer isHotCount/isColdCount queries. uint64_t HotCountThreshold; uint64_t ColdCountThreshold; - // String table owning context strings created from profile generation. - std::unordered_set ContextStrings; - private: // Helper function for updating body sample for a leaf location in // FunctionProfile void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, - const FrameLocation &LeafLoc, + const SampleCallSiteType &LeafLoc, uint64_t Count); void populateFunctionBodySamples(FunctionSamples &FunctionProfile, const RangeSample &RangeCounters, ProfiledBinary *Binary); - void populateFunctionBoundarySamples(StringRef ContextId, + void populateFunctionBoundarySamples(SampleContextRefType ContextId, FunctionSamples &FunctionProfile, const BranchSample &BranchCounters, ProfiledBinary *Binary); @@ -242,24 +239,22 @@ ProbeCounterMap &ProbeCounter, ProfiledBinary *Binary); // Fill in function body samples from probes - void - populateBodySamplesWithProbes(const RangeSample &RangeCounter, - SmallVectorImpl &ContextStrStack, - ProfiledBinary *Binary); + void populateBodySamplesWithProbes(const RangeSample &RangeCounter, + SampleContextRefType ContextStack, + ProfiledBinary *Binary); // Fill in boundary samples for a call probe - void populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, - SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary); - // Helper function to get FunctionSamples for the leaf inlined context - FunctionSamples & - getFunctionProfileForLeafProbe(SmallVectorImpl &ContextStrStack, - const MCPseudoProbeFuncDesc *LeafFuncDesc, - bool WasLeafInlined); + void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, + SampleContextRefType ContextStack, + ProfiledBinary *Binary); // Helper function to get FunctionSamples for the leaf probe FunctionSamples & - getFunctionProfileForLeafProbe(SmallVectorImpl &ContextStrStack, + getFunctionProfileForLeafProbe(SampleContextRefType ContextStrStack, const MCDecodedPseudoProbe *LeafProbe, ProfiledBinary *Binary); + + // Underlying context table serves for sample profile writer. + std::unordered_set + Contexts; }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -99,7 +99,7 @@ } void ProfileGenerator::write(std::unique_ptr Writer, - StringMap &ProfileMap) { + SampleProfileMap &ProfileMap) { if (std::error_code EC = Writer->write(ProfileMap)) exitWithError(std::move(EC)); } @@ -210,19 +210,16 @@ } FunctionSamples & -CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr, +CSProfileGenerator::getFunctionProfileForContext(SampleContextRefType Context, bool WasLeafInlined) { - auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples()); + SampleContext FContext(Context); + auto Ret = ProfileMap.emplace(Context, FunctionSamples()); if (Ret.second) { - // Make a copy of the underlying context string in string table - // before StringRef wrapper is used for context. - auto It = ContextStrings.insert(ContextStr.str()); - SampleContext FContext(*It.first, RawContext); + SampleContext FContext(Context, RawContext); if (WasLeafInlined) FContext.setAttribute(ContextWasInlined); FunctionSamples &FProfile = Ret.first->second; FProfile.setContext(FContext); - FProfile.setName(FContext.getNameWithoutContext()); } return Ret.first->second; } @@ -234,16 +231,15 @@ for (const auto &CI : BI.second) { const StringBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); - StringRef ContextId(CtxKey->Context); // Get or create function profile for the range FunctionSamples &FunctionProfile = - getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined); + getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); // Fill in function body samples populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter, Binary); // Fill in boundary sample counts as well as call site samples for calls - populateFunctionBoundarySamples(ContextId, FunctionProfile, + populateFunctionBoundarySamples(CtxKey->Context, FunctionProfile, CI.second.BranchCounter, Binary); } } @@ -257,18 +253,18 @@ } void CSProfileGenerator::updateBodySamplesforFunctionProfile( - FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc, + FunctionSamples &FunctionProfile, const SampleCallSiteType &LeafLoc, uint64_t Count) { // Filter out invalid negative(int type) lineOffset - if (LeafLoc.second.LineOffset & 0x80000000) + if (LeafLoc.Callsite.LineOffset & 0x80000000) return; // Use the maximum count of samples with same line location ErrorOr R = FunctionProfile.findSamplesAt( - LeafLoc.second.LineOffset, LeafLoc.second.Discriminator); + LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator); uint64_t PreviousCount = R ? R.get() : 0; if (PreviousCount < Count) { - FunctionProfile.addBodySamples(LeafLoc.second.LineOffset, - LeafLoc.second.Discriminator, + FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset, + LeafLoc.Callsite.Discriminator, Count - PreviousCount); } } @@ -313,7 +309,7 @@ } void CSProfileGenerator::populateFunctionBoundarySamples( - StringRef ContextId, FunctionSamples &FunctionProfile, + SampleContextRefType ContextId, FunctionSamples &FunctionProfile, const BranchSample &BranchCounters, ProfiledBinary *Binary) { for (auto Entry : BranchCounters) { @@ -330,44 +326,35 @@ auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); if (!LeafLoc.hasValue()) continue; - FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset, - LeafLoc->second.Discriminator, + FunctionProfile.addCalledTargetSamples(LeafLoc->Callsite.LineOffset, + LeafLoc->Callsite.Discriminator, CalleeName, Count); // Record head sample for called target(callee) - std::ostringstream OCalleeCtxStr; - if (ContextId.find(" @ ") != StringRef::npos) { - OCalleeCtxStr << ContextId.rsplit(" @ ").first.str(); - OCalleeCtxStr << " @ "; - } - OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str(); - - FunctionSamples &CalleeProfile = - getFunctionProfileForContext(OCalleeCtxStr.str()); + SampleContextStorageType CalleeCtx(ContextId.begin(), ContextId.end()); + assert(CalleeCtx.back().CallerName == LeafLoc->CallerName); + CalleeCtx.back() = *LeafLoc; + CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); + FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); assert(Count != 0 && "Unexpected zero weight branch"); CalleeProfile.addHeadSamples(Count); } } -static FrameLocation getCallerContext(StringRef CalleeContext, - StringRef &CallerNameWithContext) { - StringRef CallerContext = CalleeContext.rsplit(" @ ").first; - CallerNameWithContext = CallerContext.rsplit(':').first; - auto ContextSplit = CallerContext.rsplit(" @ "); - StringRef CallerFrameStr = ContextSplit.second.size() == 0 - ? ContextSplit.first - : ContextSplit.second; - FrameLocation LeafFrameLoc = {"", {0, 0}}; - StringRef Funcname; - SampleContext::decodeContextString(CallerFrameStr, Funcname, - LeafFrameLoc.second); - LeafFrameLoc.first = Funcname.str(); - return LeafFrameLoc; +static SampleCallSiteType +getCallerContext(SampleContextRefType CalleeContext, + SampleContextStorageType &CallerContext) { + assert(CalleeContext.size() > 1); + CalleeContext = CalleeContext.drop_back(); + CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); + SampleCallSiteType CallerFrame = CallerContext.back(); + CallerContext.back().Callsite = LineLocation(0, 0); + return CallerFrame; } void CSProfileGenerator::populateInferredFunctionSamples() { for (const auto &Item : ProfileMap) { - const StringRef CalleeContext = Item.first(); + const auto &CalleeContext = Item.first; const FunctionSamples &CalleeProfile = Item.second; // If we already have head sample counts, we must have value profile @@ -376,13 +363,13 @@ continue; // If we don't have context, nothing to do for caller's call site. // This could happen for entry point function. - if (CalleeContext.find(" @ ") == StringRef::npos) + if (CalleeContext.isBaseContext()) continue; // Infer Caller's frame loc and context ID through string splitting - StringRef CallerContextId; - FrameLocation &&CallerLeafFrameLoc = - getCallerContext(CalleeContext, CallerContextId); + SampleContextStorageType CallerContextId; + SampleCallSiteType &&CallerLeafFrameLoc = + getCallerContext(CalleeContext.getFullContext(), CallerContextId); // It's possible that we haven't seen any sample directly in the caller, // in which case CallerProfile will not exist. But we can't modify @@ -399,11 +386,11 @@ if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) EstimatedCallCount = 1; CallerProfile.addCalledTargetSamples( - CallerLeafFrameLoc.second.LineOffset, - CallerLeafFrameLoc.second.Discriminator, + CallerLeafFrameLoc.Callsite.LineOffset, + CallerLeafFrameLoc.Callsite.Discriminator, CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount); - CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset, - CallerLeafFrameLoc.second.Discriminator, + CallerProfile.addBodySamples(CallerLeafFrameLoc.Callsite.LineOffset, + CallerLeafFrameLoc.Callsite.Discriminator, EstimatedCallCount); CallerProfile.addTotalSamples(EstimatedCallCount); } @@ -442,7 +429,7 @@ } void CSProfileGenerator::write(std::unique_ptr Writer, - StringMap &ProfileMap) { + SampleProfileMap &ProfileMap) { if (std::error_code EC = Writer->write(ProfileMap)) exitWithError(std::move(EC)); } @@ -450,12 +437,12 @@ // Helper function to extract context prefix string stack // Extract context stack for reusing, leaf context stack will // be added compressed while looking up function profile -static void -extractPrefixContextStack(SmallVectorImpl &ContextStrStack, +static void extractPrefixContextStack( + SampleContextStorageType &ContextStack, const SmallVectorImpl &Probes, ProfiledBinary *Binary) { for (const auto *P : Probes) { - Binary->getInlineContextForProbe(P, ContextStrStack, true); + Binary->getInlineContextForProbe(P, ContextStack, true); } } @@ -468,15 +455,15 @@ for (const auto &CI : BI.second) { const ProbeBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); - SmallVector ContextStrStack; - extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary); + SampleContextStorageType ContextStack; + extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); // Fill in function body samples from probes, also infer caller's samples // from callee's probe - populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack, + populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack, Binary); // Fill in boundary samples for a call probe - populateBoundarySamplesWithProbes(CI.second.BranchCounter, - ContextStrStack, Binary); + populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack, + Binary); } } @@ -523,8 +510,8 @@ } void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( - const RangeSample &RangeCounter, - SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary) { + const RangeSample &RangeCounter, SampleContextRefType ContextStack, + ProfiledBinary *Binary) { ProbeCounterMap ProbeCounter; // Extract the top frame probes by looking up each address among the range in // the Address2ProbeMap @@ -536,7 +523,7 @@ const MCDecodedPseudoProbe *Probe = PI.first; uint64_t Count = PI.second; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary); + getFunctionProfileForLeafProbe(ContextStack, Probe, Binary); // Record the current frame and FunctionProfile whenever samples are // collected for non-danglie probes. This is for reporting all of the // zero count probes of the frame later. @@ -551,16 +538,19 @@ // Since the context id will be compressed, we have to use callee's // context id to infer caller's context id to ensure they share the // same context prefix. - StringRef CalleeContextId = - FunctionProfile.getContext().getNameWithContext(); - StringRef CallerContextId; - FrameLocation &&CallerLeafFrameLoc = + SampleContextRefType CalleeContextId = + FunctionProfile.getContext().getFullContext(); + SampleContextStorageType CallerContextId; + SampleCallSiteType &&CallerLeafFrameLoc = getCallerContext(CalleeContextId, CallerContextId); - uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset; + uint64_t CallerIndex = CallerLeafFrameLoc.Callsite.LineOffset; assert(CallerIndex && "Inferred caller's location index shouldn't be zero!"); + // Save the new context for future references. + SampleContextRefType CallerContextRef = + *Contexts.insert(CallerContextId).first; FunctionSamples &CallerProfile = - getFunctionProfileForContext(CallerContextId); + getFunctionProfileForContext(CallerContextRef); CallerProfile.setFunctionHash(InlinerDesc->FuncHash); CallerProfile.addBodySamples(CallerIndex, 0, Count); CallerProfile.addTotalSamples(Count); @@ -584,8 +574,8 @@ } void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, - SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary) { + const BranchSample &BranchCounter, SampleContextRefType ContextStack, + ProfiledBinary *Binary) { for (auto BI : BranchCounter) { uint64_t SourceOffset = BI.first.first; uint64_t TargetOffset = BI.first.second; @@ -596,7 +586,7 @@ if (CallProbe == nullptr) continue; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary); + getFunctionProfileForLeafProbe(ContextStack, CallProbe, Binary); FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); FunctionProfile.addTotalSamples(Count); StringRef CalleeName = FunctionSamples::getCanonicalFnName( @@ -609,46 +599,32 @@ } FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( - SmallVectorImpl &ContextStrStack, - const MCPseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) { - assert(ContextStrStack.size() && "Profile context must have the leaf frame"); - // Compress the context string except for the leaf frame - std::string LeafFrame = ContextStrStack.back(); - ContextStrStack.pop_back(); - CSProfileGenerator::compressRecursionContext(ContextStrStack); - CSProfileGenerator::trimContext(ContextStrStack); - - std::ostringstream OContextStr; - for (uint32_t I = 0; I < ContextStrStack.size(); I++) { - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << ContextStrStack[I]; - } + SampleContextRefType ContextStack, const MCDecodedPseudoProbe *LeafProbe, + ProfiledBinary *Binary) { + + // Explicitly copy the context for appending the leaf context + SampleContextStorageType NewContextStack(ContextStack.begin(), + ContextStack.end()); + Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); // For leaf inlined context with the top frame, we should strip off the top // frame's probe id, like: // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << StringRef(LeafFrame).split(":").first.str(); - - FunctionSamples &FunctionProile = - getFunctionProfileForContext(OContextStr.str(), WasLeafInlined); - FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash); - return FunctionProile; -} - -FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( - SmallVectorImpl &ContextStrStack, - const MCDecodedPseudoProbe *LeafProbe, ProfiledBinary *Binary) { + auto LeafFrame = NewContextStack.back(); + LeafFrame.Callsite = LineLocation(0, 0); + NewContextStack.pop_back(); + // Compress the context string except for the leaf frame + CSProfileGenerator::compressRecursionContext(NewContextStack); + CSProfileGenerator::trimContext(NewContextStack); + NewContextStack.push_back(LeafFrame); + // Save the new context for future references. + SampleContextRefType NewContextRef = *Contexts.insert(NewContextStack).first; - // Explicitly copy the context for appending the leaf context - SmallVector ContextStrStackCopy(ContextStrStack.begin(), - ContextStrStack.end()); - Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true); const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); - return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc, - WasLeafInlined); + FunctionSamples &FunctionProile = + getFunctionProfileForContext(NewContextRef, WasLeafInlined); + FunctionProile.setFunctionHash(FuncDesc->FuncHash); + return FunctionProile; } } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -121,7 +121,7 @@ // Function offset to name mapping. std::unordered_map FuncStartAddrMap; // Offset to context location map. Used to expand the context. - std::unordered_map Offset2LocStackMap; + std::unordered_map Offset2LocStackMap; // An array of offsets of all instructions sorted in increasing order. The // sorting is needed to fast advance to the next forward/backward instruction. std::vector CodeAddrs; @@ -135,6 +135,9 @@ // The symbolizer used to get inline context for an instruction. std::unique_ptr Symbolizer; + // String table owning function name strings created from the symbolizer. + std::unordered_set NameStrings; + // Pseudo probe decoder MCPseudoProbeDecoder ProbeDecoder; @@ -164,8 +167,8 @@ bool dissassembleSymbol(std::size_t SI, ArrayRef Bytes, SectionSymbolsTy &Symbols, const SectionRef &Section); /// Symbolize a given instruction pointer and return a full call context. - FrameLocationStack symbolize(const InstructionPointer &IP, - bool UseCanonicalFnName = false); + SampleContextStorageType symbolize(const InstructionPointer &IP, + bool UseCanonicalFnName = false); /// Decode the interesting parts of the binary and build internal data /// structures. On high level, the parts of interest are: @@ -175,7 +178,7 @@ /// 3. Pseudo probe related sections, used by probe-based profile /// generation. void load(); - const FrameLocationStack &getFrameLocationStack(uint64_t Offset) const { + const SampleContextStorageType &getFrameLocationStack(uint64_t Offset) const { auto I = Offset2LocStackMap.find(Offset); assert(I != Offset2LocStackMap.end() && "Can't find location for offset in the binary"); @@ -249,7 +252,7 @@ return FuncStartAddrMap[Offset]; } - Optional getInlineLeafFrameLoc(uint64_t Offset) { + Optional getInlineLeafFrameLoc(uint64_t Offset) { const auto &Stack = getFrameLocationStack(Offset); if (Stack.empty()) return {}; @@ -259,22 +262,27 @@ // Compare two addresses' inline context bool inlineContextEqual(uint64_t Add1, uint64_t Add2) const; - // Get the context string of the current stack with inline context filled in. + // Get the full context of the current stack with inline context filled in. // It will search the disassembling info stored in Offset2LocStackMap. This is // used as the key of function sample map - std::string getExpandedContextStr(const SmallVectorImpl &Stack, - bool &WasLeafInlined) const; + SampleContextStorageType + getExpandedContext(const SmallVectorImpl &Stack, + bool &WasLeafInlined) const; const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { return ProbeDecoder.getCallProbeForAddr(Address); } - void - getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, - SmallVectorImpl &InlineContextStack, - bool IncludeLeaf = false) const { - return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack, - IncludeLeaf); + void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, + SampleContextStorageType &InlineContextStack, + bool IncludeLeaf = false) const { + SmallVector ProbeInlineContext; + ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext, + IncludeLeaf); + for (auto &Callsite : ProbeInlineContext) { + InlineContextStack.push_back( + SampleCallSiteType(Callsite.first, LineLocation(Callsite.second, 0))); + } } const AddressProbesMap &getAddress2ProbesMap() const { return ProbeDecoder.getAddress2ProbesMap(); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -87,8 +87,8 @@ uint64_t Address2) const { uint64_t Offset1 = virtualAddrToOffset(Address1); uint64_t Offset2 = virtualAddrToOffset(Address2); - const FrameLocationStack &Context1 = getFrameLocationStack(Offset1); - const FrameLocationStack &Context2 = getFrameLocationStack(Offset2); + const SampleContextStorageType &Context1 = getFrameLocationStack(Offset1); + const SampleContextStorageType &Context2 = getFrameLocationStack(Offset2); if (Context1.size() != Context2.size()) return false; if (Context1.empty()) @@ -99,46 +99,34 @@ Context2.begin(), Context2.begin() + Context2.size() - 1); } -std::string -ProfiledBinary::getExpandedContextStr(const SmallVectorImpl &Stack, - bool &WasLeafInlined) const { - std::string ContextStr; - SmallVector ContextVec; +SampleContextStorageType +ProfiledBinary::getExpandedContext(const SmallVectorImpl &Stack, + bool &WasLeafInlined) const { + SampleContextStorageType ContextVec; // Process from frame root to leaf for (auto Address : Stack) { uint64_t Offset = virtualAddrToOffset(Address); - const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset); + const SampleContextStorageType &ExpandedContext = + getFrameLocationStack(Offset); // An instruction without a valid debug line will be ignored by sample // processing if (ExpandedContext.empty()) - return std::string(); + return SampleContextStorageType(); // Set WasLeafInlined to the size of inlined frame count for the last // address which is leaf WasLeafInlined = (ExpandedContext.size() > 1); - for (const auto &Loc : ExpandedContext) { - ContextVec.push_back(getCallSite(Loc)); - } + ContextVec.append(ExpandedContext); } - assert(ContextVec.size() && "Context length should be at least 1"); // Compress the context string except for the leaf frame - std::string LeafFrame = ContextVec.back(); + auto LeafFrame = ContextVec.back(); + LeafFrame.Callsite = LineLocation(0, 0); ContextVec.pop_back(); - CSProfileGenerator::compressRecursionContext(ContextVec); - CSProfileGenerator::trimContext(ContextVec); - - std::ostringstream OContextStr; - for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) { - if (OContextStr.str().size()) { - OContextStr << " @ "; - } - OContextStr << ContextVec[I]; - } - // Only keep the function name for the leaf frame - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << StringRef(LeafFrame).split(":").first.str(); - return OContextStr.str(); + assert(ContextVec.size() && "Context length should be at least 1"); + CSProfileGenerator::compressRecursionContext(ContextVec); + CSProfileGenerator::trimContext(ContextVec); + ContextVec.push_back(LeafFrame); + return ContextVec; } template @@ -263,7 +251,7 @@ // Populate a vector of the symbolized callsite at this location // We don't need symbolized info for probe-based profile, just use an // empty stack as an entry to indicate a valid binary offset - FrameLocationStack SymbolizedCallStack; + SampleContextStorageType SymbolizedCallStack; if (!UsePseudoProbes) { InstructionPointer IP(this, Offset); SymbolizedCallStack = symbolize(IP, true); @@ -410,8 +398,8 @@ Symbolizer = std::make_unique(SymbolizerOpts); } -FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP, - bool UseCanonicalFnName) { +SampleContextStorageType ProfiledBinary::symbolize(const InstructionPointer &IP, + bool UseCanonicalFnName) { assert(this == IP.Binary && "Binary should only symbolize its own instruction"); auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(), @@ -419,7 +407,7 @@ DIInliningInfo InlineStack = unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName()); - FrameLocationStack CallStack; + SampleContextStorageType CallStack; for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) { const auto &CallerFrame = InlineStack.getFrame(I); @@ -432,7 +420,8 @@ DILocation::getBaseDiscriminatorFromDiscriminator( CallerFrame.Discriminator, /* IsFSDiscriminator */ false)); - FrameLocation Callsite(FunctionName.str(), Line); + auto It = NameStrings.insert(FunctionName.str()); + SampleCallSiteType Callsite(*It.first, Line); CallStack.push_back(Callsite); }