diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -168,6 +168,9 @@ void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; }; +// Represents a callsite with caller function name and probe id +using MCPseduoProbeFrameLocation = std::pair; + class MCDecodedPseudoProbe : public MCPseudoProbeBase { uint64_t Address; MCDecodedPseudoProbeInlineTree *InlineTree; @@ -189,13 +192,13 @@ // Get the inlined context by traversing current inline tree backwards, // each tree node has its InlineSite which is taken as the context. // \p ContextStack is populated in root to leaf order - void getInlineContext(SmallVectorImpl &ContextStack, - const GUIDProbeFunctionMap &GUID2FuncMAP, - bool ShowName) const; + void + getInlineContext(SmallVectorImpl &ContextStack, + const GUIDProbeFunctionMap &GUID2FuncMAP) const; // Helper function to get the string from context stack - std::string getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP, - bool ShowName) const; + std::string + getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const; // Print pseudo probe while disassembling void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP, @@ -381,10 +384,10 @@ // Current probe(bar:3) inlined at foo:2 then inlined at main:1 // IncludeLeaf = true, Output: [main:1, foo:2, bar:3] // IncludeLeaf = false, Output: [main:1, foo:2] - void - getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, - SmallVectorImpl &InlineContextStack, - bool IncludeLeaf) const; + void getInlineContextForProbe( + const MCDecodedPseudoProbe *Probe, + SmallVectorImpl &InlineContextStack, + bool IncludeLeaf) const; const AddressProbesMap &getAddress2ProbesMap() const { return Address2ProbesMap; diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h --- a/llvm/include/llvm/ProfileData/ProfileCommon.h +++ b/llvm/include/llvm/ProfileData/ProfileCommon.h @@ -92,8 +92,8 @@ void addRecord(const sampleprof::FunctionSamples &FS, bool isCallsiteSample = false); - std::unique_ptr computeSummaryForProfiles( - const StringMap &Profiles); + std::unique_ptr + computeSummaryForProfiles(const sampleprof::SampleProfileMap &Profiles); std::unique_ptr getSummary(); }; diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -29,10 +29,13 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #include +#include #include #include +#include #include namespace llvm { @@ -122,6 +125,7 @@ SecProfileSymbolList = 3, SecFuncOffsetTable = 4, SecFuncMetadata = 5, + SecCSNameTable = 6, // marker for the first type of profile. SecFuncProfileFirst = 32, SecLBRProfile = SecFuncProfileFirst @@ -141,6 +145,8 @@ return "FuncOffsetTableSection"; case SecFuncMetadata: return "FunctionMetadata"; + case SecCSNameTable: + return "CSNameTableSection"; case SecLBRProfile: return "LBRProfileSection"; } @@ -396,64 +402,119 @@ ContextShouldBeInlined = 0x2, // Leaf of context should be inlined }; +// Represents a callsite with caller function name and line location +struct SampleContextFrame { + StringRef CallerName; + LineLocation Callsite; + + SampleContextFrame() : Callsite(0, 0) {} + + SampleContextFrame(StringRef CallerName, LineLocation Callsite) + : CallerName(CallerName), Callsite(Callsite) {} + + bool operator==(const SampleContextFrame &That) const { + return Callsite == That.Callsite && CallerName == That.CallerName; + } + + bool operator!=(const SampleContextFrame &That) const { + return !(*this == That); + } + + std::string toString(bool OutputLineLocation) const { + std::ostringstream OContextStr; + OContextStr << CallerName.str(); + if (OutputLineLocation) { + OContextStr << ":" << Callsite.LineOffset; + if (Callsite.Discriminator) + OContextStr << "." << Callsite.Discriminator; + } + return OContextStr.str(); + } +}; + +static inline hash_code hash_value(const SampleContextFrame &arg) { + return hash_combine(arg.CallerName, arg.Callsite.LineOffset, + arg.Callsite.Discriminator); +} + +using SampleContextFrameVector = SmallVector; +using SampleContextFrames = ArrayRef; + +struct SampleContextFrameHash { + uint64_t operator()(const SampleContextFrameVector &S) const { + return hash_combine_range(S.begin(), S.end()); + } +}; + // Sample context for FunctionSamples. It consists of the calling context, // the function name and context state. Internally sample context is represented -// using StringRef, which is also the input for constructing a `SampleContext`. +// using ArrayRef, which is also the input for constructing a `SampleContext`. // It can accept and represent both full context string as well as context-less // function name. -// Example of full context string (note the wrapping `[]`): -// `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` -// Example of context-less function name (same as AutoFDO): -// `_Z8funcLeafi` +// For a CS profile, a full context vector can look like: +// `main:3 _Z5funcAi:1 _Z8funcLeafi` +// For a base CS profile without calling context, the context vector should only +// contain the leaf frame name. +// For a non-CS profile, the context vector should be empty. class SampleContext { public: SampleContext() : State(UnknownContext), Attributes(ContextNone) {} - SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext) - : Attributes(ContextNone) { - setContext(ContextStr, CState); - } - // Promote context by removing top frames (represented by `ContextStrToRemove`). - // Note that with string representation of context, the promotion is effectively - // a substr operation with `ContextStrToRemove` removed from left. - void promoteOnPath(StringRef ContextStrToRemove) { - assert(FullContext.startswith(ContextStrToRemove)); + SampleContext(StringRef Name) + : Name(Name), State(UnknownContext), Attributes(ContextNone) {} - // Remove leading context and frame separator " @ ". - FullContext = FullContext.substr(ContextStrToRemove.size() + 3); - CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3); + SampleContext(SampleContextFrames Context, + ContextStateMask CState = RawContext) + : Attributes(ContextNone) { + assert(!Context.empty() && "Context is empty"); + setContext(Context, CState); } - // Split the top context frame (left-most substr) from context. - static std::pair - splitContextString(StringRef ContextStr) { - return ContextStr.split(" @ "); - } + // Give a context string, decode and populate internal states like + // Function name, Calling context and context state. Example of input + // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` + SampleContext(StringRef ContextStr, + std::list &CSNameTable, + ContextStateMask CState = RawContext) + : Attributes(ContextNone) { + assert(!ContextStr.empty()); + // Note that `[]` wrapped input indicates a full context string, otherwise + // it's treated as context-less function name only. + bool HasContext = ContextStr.startswith("["); + if (!HasContext) { + State = UnknownContext; + Name = ContextStr; + } else { + // Remove encapsulating '[' and ']' if any + ContextStr = ContextStr.substr(1, ContextStr.size() - 2); + CSNameTable.emplace_back(); + SampleContextFrameVector &Context = CSNameTable.back(); + /// Create a context vector from a given context string and save it in + /// `Context`. + StringRef ContextRemain = ContextStr; + StringRef ChildContext; + StringRef CalleeName; + while (!ContextRemain.empty()) { + auto ContextSplit = ContextRemain.split(" @ "); + ChildContext = ContextSplit.first; + ContextRemain = ContextSplit.second; + LineLocation CallSiteLoc(0, 0); + decodeContextString(ChildContext, CalleeName, CallSiteLoc); + Context.emplace_back(CalleeName, CallSiteLoc); + } - // Split the leaf context frame (right-most substr) from context. - static std::pair - rsplitContextString(StringRef ContextStr) { - auto ContextSplit = ContextStr.rsplit(" @ "); - if (ContextSplit.second.empty()) { - std::swap(ContextSplit.first, ContextSplit.second); + setContext(Context, CState); } - return ContextSplit; } - // Reconstruct a new context with the last k frames, return the context-less - // name if K = 1 - StringRef getContextWithLastKFrames(uint32_t K) { - if (K == 1) - return getNameWithoutContext(); - - size_t I = FullContext.size(); - while (K--) { - I = FullContext.find_last_of(" @ ", I); - if (I == StringRef::npos) - return FullContext; - I -= 2; - } - return FullContext.slice(I + 3, StringRef::npos); + // Promote context by removing top frames with the length of + // `ContextFramesToRemove`. Note that with array representation of context, + // the promotion is effectively a slice operation with first + // `ContextFramesToRemove` elements removed from left. + void promoteOnPath(uint32_t ContextFramesToRemove) { + assert(ContextFramesToRemove <= FullContext.size() && + "Cannot remove more than the whole context"); + FullContext = FullContext.drop_front(ContextFramesToRemove); } // Decode context string for a frame to get function name and location. @@ -479,7 +540,7 @@ } } - operator StringRef() const { return FullContext; } + operator SampleContextFrames() const { return FullContext; } bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } uint32_t getAllAttributes() { return Attributes; } @@ -488,60 +549,114 @@ void setState(ContextStateMask S) { State |= (uint32_t)S; } void clearState(ContextStateMask S) { State &= (uint32_t)~S; } bool hasContext() const { return State != UnknownContext; } - bool isBaseContext() const { return CallingContext.empty(); } - StringRef getNameWithoutContext() const { return Name; } - StringRef getCallingContext() const { return CallingContext; } - StringRef getNameWithContext() const { return FullContext; } + bool isBaseContext() const { return FullContext.size() == 1; } + StringRef getName() const { return Name; } + SampleContextFrames getContextFrames() const { return FullContext; } + + static std::string getContextString(SampleContextFrames Context, + bool IncludeLeafLineLocation = false) { + std::ostringstream OContextStr; + for (uint32_t I = 0; I < Context.size(); I++) { + if (OContextStr.str().size()) { + OContextStr << " @ "; + } + OContextStr << Context[I].toString(I != Context.size() - 1 || + IncludeLeafLineLocation); + } + return OContextStr.str(); + } -private: - // Give a context string, decode and populate internal states like - // Function name, Calling context and context state. Example of input - // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` - void setContext(StringRef ContextStr, ContextStateMask CState) { - assert(!ContextStr.empty()); - // Note that `[]` wrapped input indicates a full context string, otherwise - // it's treated as context-less function name only. - bool HasContext = ContextStr.startswith("["); - if (!HasContext && CState == UnknownContext) { - State = UnknownContext; - Name = FullContext = ContextStr; - } else { - // Assume raw context profile if unspecified - if (CState == UnknownContext) - State = RawContext; - else - State = CState; + std::string toString() const { + if (!hasContext()) + return Name.str(); + return getContextString(FullContext, false); + } - // Remove encapsulating '[' and ']' if any - if (HasContext) - FullContext = ContextStr.substr(1, ContextStr.size() - 2); - else - FullContext = ContextStr; - - // Caller is to the left of callee in context string - auto NameContext = FullContext.rsplit(" @ "); - if (NameContext.second.empty()) { - Name = NameContext.first; - CallingContext = NameContext.second; - } else { - Name = NameContext.second; - CallingContext = NameContext.first; - } + uint64_t getHashCode() const { + return hasContext() ? hash_value(getContextFrames()) + : hash_value(getName()); + } + + /// Set the name of the function. + void setName(StringRef FunctionName) { + assert(FullContext.empty() && + "setName should only be called for non-CS profile"); + Name = FunctionName; + } + + void setContext(SampleContextFrames Context, + ContextStateMask CState = RawContext) { + assert(CState != UnknownContext); + FullContext = Context; + Name = Context.back().CallerName; + State = CState; + } + + bool operator==(const SampleContext &That) const { + return State == That.State && Name == That.Name && + FullContext == That.FullContext; + } + + bool operator!=(const SampleContext &That) const { return !(*this == That); } + + bool operator<(const SampleContext &That) const { + if (State != That.State) + return State < That.State; + + if (!hasContext()) { + return (Name.compare(That.Name)) == -1; + } + + uint64_t I = 0; + while (I < std::min(FullContext.size(), That.FullContext.size())) { + auto &Context1 = FullContext[I]; + auto &Context2 = That.FullContext[I]; + auto V = Context1.CallerName.compare(Context2.CallerName); + if (V) + return V == -1; + if (Context1.Callsite != Context2.Callsite) + return Context1.Callsite < Context2.Callsite; + I++; + } + + return FullContext.size() < That.FullContext.size(); + } + + struct Hash { + uint64_t operator()(const SampleContext &Context) const { + return Context.getHashCode(); } + }; + + bool IsPrefixOf(const SampleContext &That) const { + auto ThisContext = FullContext; + auto ThatContext = That.FullContext; + if (ThatContext.size() < ThisContext.size()) + return false; + ThatContext = ThatContext.take_front(ThisContext.size()); + // Compare Leaf frame first + if (ThisContext.back().CallerName != ThatContext.back().CallerName) + return false; + // Compare leading context + return ThisContext.drop_back() == ThatContext.drop_back(); } - // Full context string including calling context and leaf function name - StringRef FullContext; - // Function name for the associated sample profile +private: + /// Mangled name of the function. StringRef Name; - // Calling context (leaf function excluded) for the associated sample profile - StringRef CallingContext; + // Full context including calling context and leaf function name + SampleContextFrames FullContext; // State of the associated sample profile uint32_t State; // Attribute of the associated sample profile uint32_t Attributes; }; +static inline hash_code hash_value(const SampleContext &arg) { + return arg.hasContext() ? hash_value(arg.getContextFrames()) + : hash_value(arg.getName()); +} + class FunctionSamples; class SampleProfileReaderItaniumRemapper; @@ -719,10 +834,9 @@ /// Optionally scale samples by \p Weight. sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { sampleprof_error Result = sampleprof_error::success; - Name = Other.getName(); if (!GUIDToFuncNameMap) GUIDToFuncNameMap = Other.GUIDToFuncNameMap; - if (Context.getNameWithContext().empty()) + if (Context.getName().empty()) Context = Other.getContext(); if (FunctionHash == 0) { // Set the function hash code for the target profile. @@ -768,7 +882,7 @@ }; if (isDeclaration(SymbolMap.lookup(getFuncName()))) { // Add to the import list only when it's defined out of module. - S.insert(getGUID(Name)); + S.insert(getGUID(getName())); } // Import hot CallTargets, which may not be available in IR because full // profile annotation cannot be done until backend compilation in ThinLTO. @@ -785,18 +899,13 @@ } /// Set the name of the function. - void setName(StringRef FunctionName) { Name = FunctionName; } + void setName(StringRef FunctionName) { Context.setName(FunctionName); } /// Return the function name. - StringRef getName() const { return Name; } - - /// Return function name with context. - StringRef getNameWithContext() const { - return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name; - } + StringRef getName() const { return Context.getName(); } /// Return the original function name. - StringRef getFuncName() const { return getFuncName(Name); } + StringRef getFuncName() const { return getFuncName(getName()); } void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } @@ -923,9 +1032,6 @@ void findAllNames(DenseSet &NameSet) const; private: - /// Mangled name of the function. - StringRef Name; - /// CFG hash value for the function. uint64_t FunctionHash = 0; @@ -971,9 +1077,12 @@ raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); -using NameFunctionSamples = std::pair; +using SampleProfileMap = + std::unordered_map; -void sortFuncProfiles(const StringMap &ProfileMap, +using NameFunctionSamples = std::pair; + +void sortFuncProfiles(const SampleProfileMap &ProfileMap, std::vector &SortedProfiles); /// Sort a LocationT->SampleT map by LocationT. @@ -1004,8 +1113,7 @@ /// sure ProfileMap's key is consistent with FunctionSample's name/context. class SampleContextTrimmer { public: - SampleContextTrimmer(StringMap &Profiles) - : ProfileMap(Profiles){}; + SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; // Trim and merge cold context profile when requested. void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, bool TrimColdContext, @@ -1015,7 +1123,7 @@ void canonicalizeContextProfiles(); private: - StringMap &ProfileMap; + SampleProfileMap &ProfileMap; }; /// ProfileSymbolList records the list of function symbols shown up @@ -1060,6 +1168,22 @@ }; } // end namespace sampleprof + +using namespace sampleprof; +// Provide DenseMapInfo for SampleContext. +template <> struct DenseMapInfo { + static inline SampleContext getEmptyKey() { return SampleContext(); } + + static inline SampleContext getTombstoneKey() { return SampleContext("@"); } + + static unsigned getHashValue(const SampleContext &Val) { + return Val.getHashCode(); + } + + static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { + return LHS == RHS; + } +}; } // end namespace llvm #endif // LLVM_PROFILEDATA_SAMPLEPROF_H diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -242,6 +242,7 @@ #include "llvm/Support/SymbolRemappingReader.h" #include #include +#include #include #include #include @@ -381,8 +382,8 @@ /// The implementaion to read sample profiles from the associated file. virtual std::error_code readImpl() = 0; - /// Print the profile for \p FName on stream \p OS. - void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs()); + /// Print the profile for \p FContext on stream \p OS. + void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs()); /// Collect functions with definitions in Module M. For reader which /// support loading function profiles on demand, return true when the @@ -437,7 +438,7 @@ } /// Return all the profiles. - StringMap &getProfiles() { return Profiles; } + SampleProfileMap &getProfiles() { return Profiles; } /// Report a parse error message. void reportError(int64_t LineNumber, const Twine &Msg) const { @@ -503,7 +504,7 @@ /// The profile of every function executed at runtime is collected /// in the structure FunctionSamples. This maps function objects /// to their corresponding profiles. - StringMap Profiles; + SampleProfileMap Profiles; /// LLVM context used to emit diagnostics. LLVMContext &Ctx; @@ -567,6 +568,11 @@ /// Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); + +private: + /// CSNameTable is used to save full context vectors. This serves as an + /// underlying immutable buffer for all clients. + std::list CSNameTable; }; class SampleProfileReaderBinary : public SampleProfileReader { @@ -638,6 +644,7 @@ /// Read a string indirectly via the name table. virtual ErrorOr readStringFromTable(); + virtual ErrorOr readSampleContextFromTable(); private: std::error_code readSummaryEntry(std::vector &Entries); @@ -695,6 +702,7 @@ std::error_code readFuncProfiles(); std::error_code readMD5NameTable(); std::error_code readNameTableSec(bool IsMD5); + std::error_code readCSNameTableSec(); std::error_code readProfileSymbolList(); virtual std::error_code readHeader() override; @@ -704,12 +712,14 @@ // placeholder for subclasses to dispatch their own section readers. virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0; virtual ErrorOr readStringFromTable() override; + virtual ErrorOr readSampleContextFromTable() override; + ErrorOr readContextFromTable(); std::unique_ptr ProfSymList; - /// The table mapping from function name to the offset of its FunctionSample - /// towards file start. - DenseMap FuncOffsetTable; + /// The table mapping from function context to the offset of its + /// FunctionSample towards file start. + DenseMap FuncOffsetTable; /// The set containing the functions to use when compiling a module. DenseSet FuncsToUse; @@ -728,6 +738,10 @@ /// the lifetime of MD5StringBuf is not shorter than that of NameTable. std::unique_ptr> MD5StringBuf; + /// CSNameTable is used to save full context vectors. This serves as an + /// underlying immutable buffer for all clients. + std::unique_ptr> CSNameTable; + /// If SkipFlatProf is true, skip the sections with /// SecFlagFlat flag. bool SkipFlatProf = false; diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -52,7 +52,7 @@ /// Write all the sample profiles in the given map of samples. /// /// \returns status code of the file update operation. - virtual std::error_code write(const StringMap &ProfileMap); + virtual std::error_code write(const SampleProfileMap &ProfileMap); raw_ostream &getOutputStream() { return *OutputStream; } @@ -78,12 +78,10 @@ : OutputStream(std::move(OS)) {} /// Write a file header for the profile file. - virtual std::error_code - writeHeader(const StringMap &ProfileMap) = 0; + virtual std::error_code writeHeader(const SampleProfileMap &ProfileMap) = 0; // Write function profiles to the profile file. - virtual std::error_code - writeFuncProfiles(const StringMap &ProfileMap); + virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap); /// Output stream where to emit the profile to. std::unique_ptr OutputStream; @@ -92,7 +90,7 @@ std::unique_ptr Summary; /// Compute summary for this profile. - void computeSummary(const StringMap &ProfileMap); + void computeSummary(const SampleProfileMap &ProfileMap); /// Profile format. SampleProfileFormat Format = SPF_None; @@ -107,8 +105,7 @@ SampleProfileWriterText(std::unique_ptr &OS) : SampleProfileWriter(OS), Indent(0) {} - std::error_code - writeHeader(const StringMap &ProfileMap) override { + std::error_code writeHeader(const SampleProfileMap &ProfileMap) override { return sampleprof_error::success; } @@ -132,19 +129,22 @@ virtual std::error_code writeSample(const FunctionSamples &S) override; protected: + virtual MapVector &getNameTable() { return NameTable; } virtual std::error_code writeMagicIdent(SampleProfileFormat Format); virtual std::error_code writeNameTable(); virtual std::error_code - writeHeader(const StringMap &ProfileMap) override; + writeHeader(const SampleProfileMap &ProfileMap) override; std::error_code writeSummary(); - std::error_code writeNameIdx(StringRef FName, bool IsContextName = false); + virtual std::error_code writeContextIdx(const SampleContext &Context); + std::error_code writeNameIdx(StringRef FName); std::error_code writeBody(const FunctionSamples &S); - inline void stablizeNameTable(std::set &V); + inline void stablizeNameTable(MapVector &NameTable, + std::set &V); MapVector NameTable; - std::unordered_set BracketedContextStr; - void addName(StringRef FName, bool IsContextName = false); + void addName(StringRef FName); + virtual void addContext(const SampleContext &Context); void addNames(const FunctionSamples &S); private: @@ -168,6 +168,7 @@ // DefaultLayout SmallVector({{SecProfSummary, 0, 0, 0, 0}, {SecNameTable, 0, 0, 0, 0}, + {SecCSNameTable, 0, 0, 0, 0}, {SecFuncOffsetTable, 0, 0, 0, 0}, {SecLBRProfile, 0, 0, 0, 0}, {SecProfileSymbolList, 0, 0, 0, 0}, @@ -190,8 +191,7 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; public: - virtual std::error_code - write(const StringMap &ProfileMap) override; + virtual std::error_code write(const SampleProfileMap &ProfileMap) override; virtual void setToCompressAllSections() override; void setToCompressSection(SecType Type); @@ -246,29 +246,32 @@ addSecFlag(SectionHdrLayout[SectionIdx], Flag); } + virtual void addContext(const SampleContext &Context) override; + // placeholder for subclasses to dispatch their own section writers. virtual std::error_code writeCustomSection(SecType Type) = 0; // Verify the SecLayout is supported by the format. virtual void verifySecLayout(SectionLayout SL) = 0; // specify the order to write sections. - virtual std::error_code - writeSections(const StringMap &ProfileMap) = 0; + virtual std::error_code writeSections(const SampleProfileMap &ProfileMap) = 0; // Dispatch section writer for each section. \p LayoutIdx is the sequence // number indicating where the section is located in SectionHdrLayout. - virtual std::error_code - writeOneSection(SecType Type, uint32_t LayoutIdx, - const StringMap &ProfileMap); + virtual std::error_code writeOneSection(SecType Type, uint32_t LayoutIdx, + const SampleProfileMap &ProfileMap); // Helper function to write name table. virtual std::error_code writeNameTable() override; + virtual std::error_code + writeContextIdx(const SampleContext &Context) override; + std::error_code writeCSNameIdx(const SampleContext &Context); + std::error_code writeCSNameTableSection(); - std::error_code writeFuncMetadata(const StringMap &Profiles); + std::error_code writeFuncMetadata(const SampleProfileMap &Profiles); // Functions to write various kinds of sections. - std::error_code - writeNameTableSection(const StringMap &ProfileMap); + std::error_code writeNameTableSection(const SampleProfileMap &ProfileMap); std::error_code writeFuncOffsetTable(); std::error_code writeProfileSymbolListSection(); @@ -289,7 +292,7 @@ void allocSecHdrTable(); std::error_code writeSecHdrTable(); virtual std::error_code - writeHeader(const StringMap &ProfileMap) override; + writeHeader(const SampleProfileMap &ProfileMap) override; std::error_code compressAndOutput(); // We will swap the raw_ostream held by LocalBufStream and that @@ -312,12 +315,16 @@ // be read. std::vector SecHdrTable; - // FuncOffsetTable maps function name to its profile offset in SecLBRProfile - // section. It is used to load function profile on demand. - MapVector FuncOffsetTable; + // FuncOffsetTable maps function context to its profile offset in + // SecLBRProfile section. It is used to load function profile on demand. + MapVector FuncOffsetTable; // Whether to use MD5 to represent string. bool UseMD5 = false; + /// CSNameTable maps function context to its offset in SecCSNameTable section. + /// The offset will be used everywhere where the context is referenced. + MapVector CSNameTable; + ProfileSymbolList *ProfSymList = nullptr; }; @@ -327,13 +334,11 @@ : SampleProfileWriterExtBinaryBase(OS) {} private: - std::error_code - writeDefaultLayout(const StringMap &ProfileMap); - std::error_code - writeCtxSplitLayout(const StringMap &ProfileMap); + std::error_code writeDefaultLayout(const SampleProfileMap &ProfileMap); + std::error_code writeCtxSplitLayout(const SampleProfileMap &ProfileMap); virtual std::error_code - writeSections(const StringMap &ProfileMap) override; + writeSections(const SampleProfileMap &ProfileMap) override; virtual std::error_code writeCustomSection(SecType Type) override { return sampleprof_error::success; @@ -380,8 +385,7 @@ public: virtual std::error_code writeSample(const FunctionSamples &S) override; - virtual std::error_code - write(const StringMap &ProfileMap) override; + virtual std::error_code write(const SampleProfileMap &ProfileMap) override; protected: /// The table mapping from function name to the offset of its FunctionSample @@ -392,7 +396,7 @@ uint64_t TableOffset; virtual std::error_code writeNameTable() override; virtual std::error_code - writeHeader(const StringMap &ProfileMap) override; + writeHeader(const SampleProfileMap &ProfileMap) override; std::error_code writeFuncOffsetTable(); }; diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -42,7 +42,7 @@ using iterator = std::set::iterator; // Constructor for non-CS profile. - ProfiledCallGraph(StringMap &ProfileMap) { + ProfiledCallGraph(SampleProfileMap &ProfileMap) { assert(!FunctionSamples::ProfileIsCS && "CS profile is not handled here"); for (const auto &Samples : ProfileMap) { addProfiledCalls(Samples.second); diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -50,7 +50,7 @@ ContextTrieNode &moveToChildContext(const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - StringRef ContextStrToRemove, + uint32_t ContextFramesToRemove, bool DeleteNode = true); void removeChildContext(const LineLocation &CallSite, StringRef ChildName); std::map &getAllChildContext(); @@ -96,9 +96,21 @@ // calling context and the context is identified by path from root to the node. class SampleContextTracker { public: - using ContextSamplesTy = SmallVector; - - SampleContextTracker(StringMap &Profiles); + struct ProfileComparer { + bool operator()(FunctionSamples *A, FunctionSamples *B) const { + // Sort function profiles by the number of total samples and their + // contexts. + if (A->getTotalSamples() == B->getTotalSamples()) + return A->getContext() < B->getContext(); + return A->getTotalSamples() > B->getTotalSamples(); + } + }; + + // Keep profiles of a function sorted so that they will be processed/promoted + // deterministically. + using ContextSamplesTy = std::set; + + SampleContextTracker(SampleProfileMap &Profiles); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, @@ -142,10 +154,11 @@ ContextTrieNode &addTopLevelContextNode(StringRef FName); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, - StringRef ContextStrToRemove); - ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode, - ContextTrieNode &ToNodeParent, - StringRef ContextStrToRemove); + uint32_t ContextFramesToRemove); + ContextTrieNode & + promoteMergeContextSamplesTree(ContextTrieNode &FromNode, + ContextTrieNode &ToNodeParent, + uint32_t ContextFramesToRemove); // Map from function name to context profiles (excluding base profile) StringMap FuncToCtxtProfiles; diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -221,24 +221,17 @@ } void MCDecodedPseudoProbe::getInlineContext( - SmallVectorImpl &ContextStack, - const GUIDProbeFunctionMap &GUID2FuncMAP, bool ShowName) const { + SmallVectorImpl &ContextStack, + const GUIDProbeFunctionMap &GUID2FuncMAP) const { uint32_t Begin = ContextStack.size(); MCDecodedPseudoProbeInlineTree *Cur = InlineTree; // It will add the string of each node's inline site during iteration. // Note that it won't include the probe's belonging function(leaf location) while (Cur->hasInlineSite()) { - std::string ContextStr; - if (ShowName) { - StringRef FuncName = - getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite)); - ContextStr += FuncName.str(); - } else { - ContextStr += Twine(std::get<0>(Cur->ISite)).str(); - } - ContextStr += ":"; - ContextStr += Twine(std::get<1>(Cur->ISite)).str(); - ContextStack.emplace_back(ContextStr); + StringRef FuncName = + getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite)); + ContextStack.emplace_back( + MCPseduoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite))); Cur = static_cast(Cur->Parent); } // Make the ContextStack in caller-callee order @@ -246,14 +239,14 @@ } std::string MCDecodedPseudoProbe::getInlineContextStr( - const GUIDProbeFunctionMap &GUID2FuncMAP, bool ShowName) const { + const GUIDProbeFunctionMap &GUID2FuncMAP) const { std::ostringstream OContextStr; - SmallVector ContextStack; - getInlineContext(ContextStack, GUID2FuncMAP, ShowName); - for (auto &CxtStr : ContextStack) { + SmallVector ContextStack; + getInlineContext(ContextStack, GUID2FuncMAP); + for (auto &Cxt : ContextStack) { if (OContextStr.str().size()) OContextStr << " @ "; - OContextStr << CxtStr; + OContextStr << Cxt.first.str() << ":" << Cxt.second; } return OContextStr.str(); } @@ -273,7 +266,7 @@ } OS << "Index: " << Index << " "; OS << "Type: " << PseudoProbeTypeStr[static_cast(Type)] << " "; - std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP, ShowName); + std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP); if (InlineContextStr.size()) { OS << "Inlined: @ "; OS << InlineContextStr; @@ -552,15 +545,16 @@ void MCPseudoProbeDecoder::getInlineContextForProbe( const MCDecodedPseudoProbe *Probe, - SmallVectorImpl &InlineContextStack, bool IncludeLeaf) const { - Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true); + SmallVectorImpl &InlineContextStack, + bool IncludeLeaf) const { + Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap); if (!IncludeLeaf) return; // Note that the context from probe doesn't include leaf frame, // hence we need to retrieve and prepend leaf if requested. const auto *FuncDesc = getFuncDescForGUID(Probe->getGuid()); - InlineContextStack.emplace_back(FuncDesc->FuncName + ":" + - Twine(Probe->getIndex()).str()); + InlineContextStack.emplace_back( + MCPseduoProbeFrameLocation(FuncDesc->FuncName, Probe->getIndex())); } const MCPseudoProbeFuncDesc *MCPseudoProbeDecoder::getInlinerDescForProbe( diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -181,17 +181,17 @@ std::unique_ptr SampleProfileSummaryBuilder::computeSummaryForProfiles( - const StringMap &Profiles) { + const SampleProfileMap &Profiles) { assert(NumFunctions == 0 && "This can only be called on an empty summary builder"); - StringMap ContextLessProfiles; - const StringMap *ProfilesToUse = &Profiles; + sampleprof::SampleProfileMap ContextLessProfiles; + const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles; // For CSSPGO, context-sensitive profile effectively split a function profile // into many copies each representing the CFG profile of a particular calling // context. That makes the count distribution looks more flat as we now have // more function profiles each with lower counts, which in turn leads to lower // hot thresholds. To compensate for that, by defauly we merge context - // profiles before coumputing profile summary. + // profiles before computing profile summary. if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && !UseContextLessSummary.getNumOccurrences())) { for (const auto &I : Profiles) { diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -199,18 +199,16 @@ } void sampleprof::sortFuncProfiles( - const StringMap &ProfileMap, + const SampleProfileMap &ProfileMap, std::vector &SortedProfiles) { for (const auto &I : ProfileMap) { - assert(I.getKey() == I.second.getNameWithContext() && - "Inconsistent profile map"); - SortedProfiles.push_back( - std::make_pair(I.second.getNameWithContext(), &I.second)); + assert(I.first == I.second.getContext() && "Inconsistent profile map"); + SortedProfiles.push_back(std::make_pair(I.second.getContext(), &I.second)); } llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A, const NameFunctionSamples &B) { if (A.second->getTotalSamples() == B.second->getTotalSamples()) - return A.first > B.first; + return A.first < B.first; return A.second->getTotalSamples() > B.second->getTotalSamples(); }); } @@ -262,7 +260,7 @@ } void FunctionSamples::findAllNames(DenseSet &NameSet) const { - NameSet.insert(Name); + NameSet.insert(getName()); for (const auto &BS : BodySamples) for (const auto &TS : BS.second.getCallTargets()) NameSet.insert(TS.getKey()); @@ -343,23 +341,23 @@ // Filter the cold profiles from ProfileMap and move them into a tmp // container - std::vector> ColdProfiles; + std::vector> ColdProfiles; for (const auto &I : ProfileMap) { const FunctionSamples &FunctionProfile = I.second; if (FunctionProfile.getTotalSamples() >= ColdCountThreshold) continue; - ColdProfiles.emplace_back(I.getKey(), &I.second); + ColdProfiles.emplace_back(I.first, &I.second); } // Remove the cold profile from ProfileMap and merge them into // MergedProfileMap by the last K frames of context - StringMap MergedProfileMap; + SampleProfileMap MergedProfileMap; for (const auto &I : ColdProfiles) { if (MergeColdContext) { - auto Ret = MergedProfileMap.try_emplace( - I.second->getContext().getContextWithLastKFrames( - ColdContextFrameLength), - FunctionSamples()); + auto MergedContext = I.second->getContext().getContextFrames(); + if (ColdContextFrameLength < MergedContext.size()) + MergedContext = MergedContext.take_back(ColdContextFrameLength); + auto Ret = MergedProfileMap.emplace(MergedContext, FunctionSamples()); FunctionSamples &MergedProfile = Ret.first->second; MergedProfile.merge(*I.second); } @@ -370,16 +368,15 @@ for (const auto &I : MergedProfileMap) { // Filter the cold merged profile if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold && - ProfileMap.find(I.getKey()) == ProfileMap.end()) + ProfileMap.find(I.first) == ProfileMap.end()) continue; // Merge the profile if the original profile exists, otherwise just insert // as a new profile - auto Ret = ProfileMap.try_emplace(I.getKey(), FunctionSamples()); + auto Ret = ProfileMap.emplace(I.first, FunctionSamples()); if (Ret.second) { - SampleContext FContext(Ret.first->first(), RawContext); + SampleContext FContext(Ret.first->first, RawContext); FunctionSamples &FProfile = Ret.first->second; FProfile.setContext(FContext); - FProfile.setName(FContext.getNameWithoutContext()); } FunctionSamples &OrigProfile = Ret.first->second; OrigProfile.merge(I.second); @@ -387,12 +384,12 @@ } void SampleContextTrimmer::canonicalizeContextProfiles() { - std::vector ProfilesToBeRemoved; - StringMap ProfilesToBeAdded; + std::vector ProfilesToBeRemoved; + SampleProfileMap ProfilesToBeAdded; for (auto &I : ProfileMap) { FunctionSamples &FProfile = I.second; - StringRef ContextStr = FProfile.getNameWithContext(); - if (I.first() == ContextStr) + SampleContext &Context = FProfile.getContext(); + if (I.first == Context) continue; // Use the context string from FunctionSamples to update the keys of @@ -407,10 +404,10 @@ // with different profiles) from the map can cause a conflict if they are // not handled in a right order. This can be solved by just caching the // profiles to be added. - auto Ret = ProfilesToBeAdded.try_emplace(ContextStr, FProfile); + auto Ret = ProfilesToBeAdded.emplace(Context, FProfile); (void)Ret; assert(Ret.second && "Context conflict during canonicalization"); - ProfilesToBeRemoved.push_back(I.first()); + ProfilesToBeRemoved.push_back(I.first); } for (auto &I : ProfilesToBeRemoved) { @@ -418,7 +415,7 @@ } for (auto &I : ProfilesToBeAdded) { - ProfileMap.try_emplace(I.first(), I.second); + ProfileMap.emplace(I.first, I.second); } } diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -59,9 +59,9 @@ /// /// \param FName Name of the function to print. /// \param OS Stream to emit the output to. -void SampleProfileReader::dumpFunctionProfile(StringRef FName, +void SampleProfileReader::dumpFunctionProfile(SampleContext FContext, raw_ostream &OS) { - OS << "Function: " << FName << ": " << Profiles[FName]; + OS << "Function: " << FContext.toString() << ": " << Profiles[FContext]; } /// Dump all the function profiles found on stream \p OS. @@ -276,12 +276,11 @@ return sampleprof_error::malformed; } SeenMetadata = false; - SampleContext FContext(FName); + SampleContext FContext(FName, CSNameTable); if (FContext.hasContext()) ++CSProfileCount; Profiles[FContext] = FunctionSamples(); FunctionSamples &FProfile = Profiles[FContext]; - FProfile.setName(FContext.getNameWithoutContext()); FProfile.setContext(FContext); MergeResult(Result, FProfile.addTotalSamples(NumSamples)); MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); @@ -453,6 +452,13 @@ return NameTable[*Idx]; } +ErrorOr SampleProfileReaderBinary::readSampleContextFromTable() { + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + return SampleContext(*FName); +} + ErrorOr SampleProfileReaderExtBinaryBase::readStringFromTable() { if (!FixedLengthMD5) return SampleProfileReaderBinary::readStringFromTable(); @@ -579,18 +585,16 @@ if (std::error_code EC = NumHeadSamples.getError()) return EC; - auto FName(readStringFromTable()); - if (std::error_code EC = FName.getError()) + ErrorOr FContext(readSampleContextFromTable()); + if (std::error_code EC = FContext.getError()) return EC; - SampleContext FContext(*FName); - Profiles[FContext] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[FContext]; - FProfile.setName(FContext.getNameWithoutContext()); - FProfile.setContext(FContext); + Profiles[*FContext] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[*FContext]; + FProfile.setContext(*FContext); FProfile.addHeadSamples(*NumHeadSamples); - if (FContext.hasContext()) + if (FContext->hasContext()) CSProfileCount++; if (std::error_code EC = readProfile(FProfile)) @@ -609,6 +613,31 @@ return sampleprof_error::success; } +ErrorOr +SampleProfileReaderExtBinaryBase::readContextFromTable() { + auto ContextIdx = readNumber(); + if (std::error_code EC = ContextIdx.getError()) + return EC; + if (*ContextIdx >= CSNameTable->size()) + return sampleprof_error::truncated_name_table; + return (*CSNameTable)[*ContextIdx]; +} + +ErrorOr +SampleProfileReaderExtBinaryBase::readSampleContextFromTable() { + if (ProfileIsCS) { + auto FContext(readContextFromTable()); + if (std::error_code EC = FContext.getError()) + return EC; + return SampleContext(*FContext); + } else { + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + return SampleContext(*FName); + } +} + std::error_code SampleProfileReaderExtBinaryBase::readOneSection( const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { Data = Start; @@ -636,6 +665,11 @@ return EC; break; } + case SecCSNameTable: { + if (std::error_code EC = readCSNameTableSec()) + return EC; + break; + } case SecLBRProfile: if (std::error_code EC = readFuncProfiles()) return EC; @@ -687,7 +721,7 @@ FuncOffsetTable.reserve(*Size); for (uint32_t I = 0; I < *Size; ++I) { - auto FName(readStringFromTable()); + auto FName(readSampleContextFromTable()); if (std::error_code EC = FName.getError()) return EC; @@ -736,27 +770,20 @@ if (std::error_code EC = readFuncProfile(FuncProfileAddr)) return EC; } - } else if (FunctionSamples::ProfileIsCS) { + } else if (ProfileIsCS) { // Compute the ordered set of names, so we can // get all context profiles under a subtree by // iterating through the ordered names. - struct Comparer { - // Ignore the closing ']' when ordering context - bool operator()(const StringRef &L, const StringRef &R) const { - return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1); - } - }; - std::set OrderedNames; + std::set OrderedContexts; for (auto Name : FuncOffsetTable) { - OrderedNames.insert(Name.first); + OrderedContexts.insert(Name.first); } // For each function in current module, load all // context profiles for the function. for (auto NameOffset : FuncOffsetTable) { - StringRef ContextName = NameOffset.first; - SampleContext FContext(ContextName); - auto FuncName = FContext.getNameWithoutContext(); + SampleContext FContext = NameOffset.first; + auto FuncName = FContext.getName(); if (!FuncsToUse.count(FuncName) && (!Remapper || !Remapper->exist(FuncName))) continue; @@ -764,22 +791,21 @@ // For each context profile we need, try to load // all context profile in the subtree. This can // help profile guided importing for ThinLTO. - auto It = OrderedNames.find(ContextName); - while (It != OrderedNames.end() && - It->startswith(ContextName.substr(0, ContextName.size() - 1))) { + auto It = OrderedContexts.find(FContext); + while (It != OrderedContexts.end() && FContext.IsPrefixOf(*It)) { const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It]; assert(FuncProfileAddr < End && "out of LBRProfile section"); if (std::error_code EC = readFuncProfile(FuncProfileAddr)) return EC; // Remove loaded context profile so we won't // load it repeatedly. - It = OrderedNames.erase(It); + It = OrderedContexts.erase(It); } } } else { for (auto NameOffset : FuncOffsetTable) { SampleContext FContext(NameOffset.first); - auto FuncName = FContext.getNameWithoutContext(); + auto FuncName = FContext.getName(); if (!FuncsToUse.count(FuncName) && (!Remapper || !Remapper->exist(FuncName))) continue; @@ -988,22 +1014,62 @@ return SampleProfileReaderBinary::readNameTable(); } +// Read in the CS name table section, which basically contains a list of context +// vectors. Each element of a context vector, aka a frame, refers to the +// underlying raw function names that are stored in the name table, as well as +// a callsite identifier that only makes sense for non-leaf frames. +std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() { + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + + std::vector *PNameVec = + new std::vector(); + PNameVec->reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + PNameVec->emplace_back(SampleContextFrameVector()); + auto ContextSize = readNumber(); + if (std::error_code EC = ContextSize.getError()) + return EC; + for (uint32_t J = 0; J < *ContextSize; ++J) { + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + auto LineOffset = readNumber(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + if (!isOffsetLegal(*LineOffset)) + return std::error_code(); + + auto Discriminator = readNumber(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + PNameVec->back().emplace_back( + FName.get(), LineLocation(LineOffset.get(), Discriminator.get())); + } + } + + // From this point the underlying object of CSNameTable should be immutable. + CSNameTable.reset(PNameVec); + return sampleprof_error::success; +} + std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { while (Data < End) { - auto FName(readStringFromTable()); - if (std::error_code EC = FName.getError()) + auto FContext(readSampleContextFromTable()); + if (std::error_code EC = FContext.getError()) return EC; - SampleContext FContext(*FName); - bool ProfileInMap = Profiles.count(FContext); - + bool ProfileInMap = Profiles.count(*FContext); if (ProfileIsProbeBased) { auto Checksum = readNumber(); if (std::error_code EC = Checksum.getError()) return EC; if (ProfileInMap) - Profiles[FContext].setFunctionHash(*Checksum); + Profiles[*FContext].setFunctionHash(*Checksum); } if (ProfileHasAttribute) { @@ -1011,7 +1077,7 @@ if (std::error_code EC = Attributes.getError()) return EC; if (ProfileInMap) - Profiles[FContext].getContext().setAllAttributes(*Attributes); + Profiles[*FContext].getContext().setAllAttributes(*Attributes); } } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -41,8 +41,8 @@ using namespace llvm; using namespace sampleprof; -std::error_code SampleProfileWriter::writeFuncProfiles( - const StringMap &ProfileMap) { +std::error_code +SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) { std::vector V; sortFuncProfiles(ProfileMap, V); for (const auto &I : V) { @@ -52,8 +52,7 @@ return sampleprof_error::success; } -std::error_code -SampleProfileWriter::write(const StringMap &ProfileMap) { +std::error_code SampleProfileWriter::write(const SampleProfileMap &ProfileMap) { if (std::error_code EC = writeHeader(ProfileMap)) return EC; @@ -117,8 +116,8 @@ return sampleprof_error::success; } -std::error_code SampleProfileWriterExtBinaryBase::write( - const StringMap &ProfileMap) { +std::error_code +SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) { if (std::error_code EC = writeHeader(ProfileMap)) return EC; @@ -133,11 +132,28 @@ return sampleprof_error::success; } +std::error_code SampleProfileWriterExtBinaryBase::writeContextIdx( + const SampleContext &Context) { + if (Context.hasContext()) + return writeCSNameIdx(Context); + else + return SampleProfileWriterBinary::writeNameIdx(Context.getName()); +} + +std::error_code +SampleProfileWriterExtBinaryBase::writeCSNameIdx(const SampleContext &Context) { + const auto &Ret = CSNameTable.find(Context); + if (Ret == CSNameTable.end()) + return sampleprof_error::truncated_name_table; + encodeULEB128(Ret->second, *OutputStream); + return sampleprof_error::success; +} + std::error_code SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) { uint64_t Offset = OutputStream->tell(); - StringRef Name = S.getNameWithContext(); - FuncOffsetTable[Name] = Offset - SecLBRProfileStart; + auto &Context = S.getContext(); + FuncOffsetTable[Context] = Offset - SecLBRProfileStart; encodeULEB128(S.getHeadSamples(), *OutputStream); return writeBody(S); } @@ -150,8 +166,7 @@ // Write out FuncOffsetTable. for (auto Entry : FuncOffsetTable) { - if (std::error_code EC = - writeNameIdx(Entry.first, FunctionSamples::ProfileIsCS)) + if (std::error_code EC = writeContextIdx(Entry.first)) return EC; encodeULEB128(Entry.second, OS); } @@ -160,13 +175,12 @@ } std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata( - const StringMap &Profiles) { + const SampleProfileMap &Profiles) { if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS) return sampleprof_error::success; auto &OS = *OutputStream; for (const auto &Entry : Profiles) { - if (std::error_code EC = writeNameIdx(Entry.second.getNameWithContext(), - FunctionSamples::ProfileIsCS)) + if (std::error_code EC = writeContextIdx(Entry.second.getContext())) return EC; if (FunctionSamples::ProfileIsProbeBased) encodeULEB128(Entry.second.getFunctionHash(), OS); @@ -182,7 +196,7 @@ auto &OS = *OutputStream; std::set V; - stablizeNameTable(V); + stablizeNameTable(NameTable, V); // Write out the MD5 name table. We wrote unencoded MD5 so reader can // retrieve the name using the name index without having to read the @@ -195,11 +209,10 @@ } std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection( - const StringMap &ProfileMap) { + const SampleProfileMap &ProfileMap) { for (const auto &I : ProfileMap) { - assert(I.first() == I.second.getNameWithContext() && - "Inconsistent profile map"); - addName(I.second.getNameWithContext(), FunctionSamples::ProfileIsCS); + assert(I.first == I.second.getContext() && "Inconsistent profile map"); + addContext(I.second.getContext()); addNames(I.second); } @@ -218,6 +231,34 @@ return sampleprof_error::success; } +std::error_code SampleProfileWriterExtBinaryBase::writeCSNameTableSection() { + // Sort the names to make CSNameTable deterministic. + std::set OrderedContexts; + for (const auto &I : CSNameTable) + OrderedContexts.insert(I.first); + assert(OrderedContexts.size() == CSNameTable.size() && + "Unmatched ordered and unordered contexts"); + uint64_t I = 0; + for (auto &Context : OrderedContexts) + CSNameTable[Context] = I++; + + auto &OS = *OutputStream; + encodeULEB128(OrderedContexts.size(), OS); + support::endian::Writer Writer(OS, support::little); + for (auto Context : OrderedContexts) { + auto Frames = Context.getContextFrames(); + encodeULEB128(Frames.size(), OS); + for (auto &Callsite : Frames) { + if (std::error_code EC = writeNameIdx(Callsite.CallerName)) + return EC; + encodeULEB128(Callsite.Callsite.LineOffset, OS); + encodeULEB128(Callsite.Callsite.Discriminator, OS); + } + } + + return sampleprof_error::success; +} + std::error_code SampleProfileWriterExtBinaryBase::writeProfileSymbolListSection() { if (ProfSymList && ProfSymList->size() > 0) @@ -228,8 +269,7 @@ } std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( - SecType Type, uint32_t LayoutIdx, - const StringMap &ProfileMap) { + SecType Type, uint32_t LayoutIdx, const SampleProfileMap &ProfileMap) { // The setting of SecFlagCompress should happen before markSectionStart. if (Type == SecProfileSymbolList && ProfSymList && ProfSymList->toCompress()) setToCompressSection(SecProfileSymbolList); @@ -253,6 +293,10 @@ if (auto EC = writeNameTableSection(ProfileMap)) return EC; break; + case SecCSNameTable: + if (auto EC = writeCSNameTableSection()) + return EC; + break; case SecLBRProfile: SecLBRProfileStart = OutputStream->tell(); if (std::error_code EC = writeFuncProfiles(ProfileMap)) @@ -281,7 +325,7 @@ } std::error_code SampleProfileWriterExtBinary::writeDefaultLayout( - const StringMap &ProfileMap) { + const SampleProfileMap &ProfileMap) { // The const indices passed to writeOneSection below are specifying the // positions of the sections in SectionHdrLayout. Look at // initSectionHdrLayout to find out where each section is located in @@ -290,32 +334,33 @@ return EC; if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap)) return EC; - if (auto EC = writeOneSection(SecLBRProfile, 3, ProfileMap)) + if (auto EC = writeOneSection(SecCSNameTable, 2, ProfileMap)) return EC; - if (auto EC = writeOneSection(SecProfileSymbolList, 4, ProfileMap)) + if (auto EC = writeOneSection(SecLBRProfile, 4, ProfileMap)) return EC; - if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ProfileMap)) + if (auto EC = writeOneSection(SecProfileSymbolList, 5, ProfileMap)) return EC; - if (auto EC = writeOneSection(SecFuncMetadata, 5, ProfileMap)) + if (auto EC = writeOneSection(SecFuncOffsetTable, 3, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecFuncMetadata, 6, ProfileMap)) return EC; return sampleprof_error::success; } -static void -splitProfileMapToTwo(const StringMap &ProfileMap, - StringMap &ContextProfileMap, - StringMap &NoContextProfileMap) { +static void splitProfileMapToTwo(const SampleProfileMap &ProfileMap, + SampleProfileMap &ContextProfileMap, + SampleProfileMap &NoContextProfileMap) { for (const auto &I : ProfileMap) { if (I.second.getCallsiteSamples().size()) - ContextProfileMap.insert({I.first(), I.second}); + ContextProfileMap.insert({I.first, I.second}); else - NoContextProfileMap.insert({I.first(), I.second}); + NoContextProfileMap.insert({I.first, I.second}); } } std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout( - const StringMap &ProfileMap) { - StringMap ContextProfileMap, NoContextProfileMap; + const SampleProfileMap &ProfileMap) { + SampleProfileMap ContextProfileMap, NoContextProfileMap; splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap); if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap)) @@ -345,7 +390,7 @@ } std::error_code SampleProfileWriterExtBinary::writeSections( - const StringMap &ProfileMap) { + const SampleProfileMap &ProfileMap) { std::error_code EC; if (SecLayout == DefaultLayout) EC = writeDefaultLayout(ProfileMap); @@ -356,8 +401,8 @@ return EC; } -std::error_code SampleProfileWriterCompactBinary::write( - const StringMap &ProfileMap) { +std::error_code +SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) { if (std::error_code EC = SampleProfileWriter::write(ProfileMap)) return EC; if (std::error_code EC = writeFuncOffsetTable()) @@ -376,7 +421,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { auto &OS = *OutputStream; if (FunctionSamples::ProfileIsCS) - OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples(); + OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples(); else OS << S.getName() << ":" << S.getTotalSamples(); @@ -432,27 +477,28 @@ return sampleprof_error::success; } -std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName, - bool IsContextName) { - std::string BracketedName; - if (IsContextName) { - BracketedName = "[" + FName.str() + "]"; - FName = StringRef(BracketedName); - } +std::error_code +SampleProfileWriterBinary::writeContextIdx(const SampleContext &Context) { + assert(!Context.hasContext() && "cs profile is not supported"); + return writeNameIdx(Context.getName()); +} - const auto &Ret = NameTable.find(FName); - if (Ret == NameTable.end()) +std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) { + auto &NTable = getNameTable(); + const auto &Ret = NTable.find(FName); + if (Ret == NTable.end()) return sampleprof_error::truncated_name_table; encodeULEB128(Ret->second, *OutputStream); return sampleprof_error::success; } -void SampleProfileWriterBinary::addName(StringRef FName, bool IsContextName) { - if (IsContextName) { - auto It = BracketedContextStr.insert("[" + FName.str() + "]"); - FName = StringRef(*It.first); - } - NameTable.insert(std::make_pair(FName, 0)); +void SampleProfileWriterBinary::addName(StringRef FName) { + auto &NTable = getNameTable(); + NTable.insert(std::make_pair(FName, 0)); +} + +void SampleProfileWriterBinary::addContext(const SampleContext &Context) { + addName(Context.getName()); } void SampleProfileWriterBinary::addNames(const FunctionSamples &S) { @@ -472,7 +518,19 @@ } } -void SampleProfileWriterBinary::stablizeNameTable(std::set &V) { +void SampleProfileWriterExtBinaryBase::addContext( + const SampleContext &Context) { + if (Context.hasContext()) { + for (auto &Callsite : Context.getContextFrames()) + SampleProfileWriterBinary::addName(Callsite.CallerName); + CSNameTable.insert(std::make_pair(Context, 0)); + } else { + SampleProfileWriterBinary::addName(Context.getName()); + } +} + +void SampleProfileWriterBinary::stablizeNameTable( + MapVector &NameTable, std::set &V) { // Sort the names to make NameTable deterministic. for (const auto &I : NameTable) V.insert(I.first); @@ -484,7 +542,7 @@ std::error_code SampleProfileWriterBinary::writeNameTable() { auto &OS = *OutputStream; std::set V; - stablizeNameTable(V); + stablizeNameTable(NameTable, V); // Write out the name table. encodeULEB128(NameTable.size(), OS); @@ -513,8 +571,7 @@ // Write out FuncOffsetTable. for (auto Entry : FuncOffsetTable) { - if (std::error_code EC = - writeNameIdx(Entry.first, FunctionSamples::ProfileIsCS)) + if (std::error_code EC = writeNameIdx(Entry.first)) return EC; encodeULEB128(Entry.second, OS); } @@ -524,7 +581,7 @@ std::error_code SampleProfileWriterCompactBinary::writeNameTable() { auto &OS = *OutputStream; std::set V; - stablizeNameTable(V); + stablizeNameTable(NameTable, V); // Write out the name table. encodeULEB128(NameTable.size(), OS); @@ -543,8 +600,8 @@ return sampleprof_error::success; } -std::error_code SampleProfileWriterBinary::writeHeader( - const StringMap &ProfileMap) { +std::error_code +SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) { writeMagicIdent(Format); computeSummary(ProfileMap); @@ -553,9 +610,8 @@ // Generate the name table for all the functions referenced in the profile. for (const auto &I : ProfileMap) { - assert(I.first() == I.second.getNameWithContext() && - "Inconsistent profile map"); - addName(I.first(), FunctionSamples::ProfileIsCS); + assert(I.first == I.second.getContext() && "Inconsistent profile map"); + addContext(I.first); addNames(I.second); } @@ -629,7 +685,7 @@ } std::error_code SampleProfileWriterExtBinaryBase::writeHeader( - const StringMap &ProfileMap) { + const SampleProfileMap &ProfileMap) { auto &OS = *OutputStream; FileStart = OS.tell(); writeMagicIdent(Format); @@ -639,7 +695,7 @@ } std::error_code SampleProfileWriterCompactBinary::writeHeader( - const StringMap &ProfileMap) { + const SampleProfileMap &ProfileMap) { support::endian::Writer Writer(*OutputStream, support::little); if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap)) return EC; @@ -669,9 +725,7 @@ } std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { auto &OS = *OutputStream; - - if (std::error_code EC = - writeNameIdx(S.getNameWithContext(), FunctionSamples::ProfileIsCS)) + if (std::error_code EC = writeContextIdx(S.getContext())) return EC; encodeULEB128(S.getTotalSamples(), OS); @@ -790,8 +844,7 @@ return std::move(Writer); } -void SampleProfileWriter::computeSummary( - const StringMap &ProfileMap) { +void SampleProfileWriter::computeSummary(const SampleProfileMap &ProfileMap) { SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); Summary = Builder.computeSummaryForProfiles(ProfileMap); } diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -64,7 +64,7 @@ ContextTrieNode &ContextTrieNode::moveToChildContext( const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - StringRef ContextStrToRemove, bool DeleteNode) { + uint32_t ContextFramesToRemove, bool DeleteNode) { uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); LineLocation OldCallSite = NodeToMove.CallSiteLoc; @@ -86,10 +86,10 @@ FunctionSamples *FSamples = Node->getFunctionSamples(); if (FSamples) { - FSamples->getContext().promoteOnPath(ContextStrToRemove); + FSamples->getContext().promoteOnPath(ContextFramesToRemove); FSamples->getContext().setState(SyntheticContext); - LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext() - << "\n"); + LLVM_DEBUG(dbgs() << " Context promoted to: " + << FSamples->getContext().toString() << "\n"); } for (auto &It : Node->getAllChildContext()) { @@ -203,14 +203,14 @@ } // Profiler tracker than manages profiles and its associated context -SampleContextTracker::SampleContextTracker( - StringMap &Profiles) { +SampleContextTracker::SampleContextTracker(SampleProfileMap &Profiles) { for (auto &FuncSample : Profiles) { FunctionSamples *FSamples = &FuncSample.second; - SampleContext Context(FuncSample.first(), RawContext); - LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n"); + SampleContext Context = FuncSample.first; + LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString() + << "\n"); if (!Context.isBaseContext()) - FuncToCtxtProfiles[Context.getNameWithoutContext()].push_back(FSamples); + FuncToCtxtProfiles[Context.getName()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); @@ -234,7 +234,8 @@ if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); LLVM_DEBUG(if (FSamples) { - dbgs() << " Callee context found: " << FSamples->getContext() << "\n"; + dbgs() << " Callee context found: " << FSamples->getContext().toString() + << "\n"; }); return FSamples; } @@ -326,14 +327,14 @@ // into base profile. for (auto *CSamples : FuncToCtxtProfiles[Name]) { SampleContext &Context = CSamples->getContext(); - ContextTrieNode *FromNode = getContextFor(Context); - if (FromNode == Node) - continue; - // Skip inlined context profile and also don't re-merge any context if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) continue; + ContextTrieNode *FromNode = getContextFor(Context); + if (FromNode == Node) + continue; + ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode); assert((!Node || Node == &ToNode) && "Expect only one base profile"); Node = &ToNode; @@ -351,7 +352,7 @@ const FunctionSamples *InlinedSamples) { assert(InlinedSamples && "Expect non-null inlined samples"); LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " - << InlinedSamples->getContext() << "\n"); + << InlinedSamples->getContext().toString() << "\n"); InlinedSamples->getContext().setState(InlinedContext); } @@ -403,13 +404,14 @@ FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); assert(FromSamples && "Shouldn't promote a context without profile"); LLVM_DEBUG(dbgs() << " Found context tree root to promote: " - << FromSamples->getContext() << "\n"); + << FromSamples->getContext().toString() << "\n"); assert(!FromSamples->getContext().hasState(InlinedContext) && "Shouldn't promote inlined context profile"); - StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext(); + uint32_t ContextFramesToRemove = + FromSamples->getContext().getContextFrames().size() - 1; return promoteMergeContextSamplesTree(NodeToPromo, RootContext, - ContextStrToRemove); + ContextFramesToRemove); } void SampleContextTracker::dump() { RootContext.dumpTree(); } @@ -474,27 +476,18 @@ SampleContextTracker::getOrCreateContextPath(const SampleContext &Context, bool AllowCreate) { ContextTrieNode *ContextNode = &RootContext; - StringRef ContextRemain = Context; - StringRef ChildContext; - StringRef CalleeName; LineLocation CallSiteLoc(0, 0); - while (ContextNode && !ContextRemain.empty()) { - auto ContextSplit = SampleContext::splitContextString(ContextRemain); - ChildContext = ContextSplit.first; - ContextRemain = ContextSplit.second; - LineLocation NextCallSiteLoc(0, 0); - SampleContext::decodeContextString(ChildContext, CalleeName, - NextCallSiteLoc); - + for (auto &Callsite : Context.getContextFrames()) { // Create child node at parent line/disc location if (AllowCreate) { - ContextNode = - ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName); + ContextNode = ContextNode->getOrCreateChildContext(CallSiteLoc, + Callsite.CallerName); } else { - ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName); + ContextNode = + ContextNode->getChildContext(CallSiteLoc, Callsite.CallerName); } - CallSiteLoc = NextCallSiteLoc; + CallSiteLoc = Callsite.Callsite; } assert((!AllowCreate || ContextNode) && @@ -514,7 +507,7 @@ void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, - StringRef ContextStrToRemove) { + uint32_t ContextFramesToRemove) { FunctionSamples *FromSamples = FromNode.getFunctionSamples(); FunctionSamples *ToSamples = ToNode.getFunctionSamples(); if (FromSamples && ToSamples) { @@ -526,15 +519,15 @@ // Transfer FromSamples from FromNode to ToNode ToNode.setFunctionSamples(FromSamples); FromSamples->getContext().setState(SyntheticContext); - FromSamples->getContext().promoteOnPath(ContextStrToRemove); + FromSamples->getContext().promoteOnPath(ContextFramesToRemove); FromNode.setFunctionSamples(nullptr); } } ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, - StringRef ContextStrToRemove) { - assert(!ContextStrToRemove.empty() && "Context to remove can't be empty"); + uint32_t ContextFramesToRemove) { + assert(ContextFramesToRemove && "Context to remove can't be empty"); // Ignore call site location if destination is top level under root LineLocation NewCallSiteLoc = LineLocation(0, 0); @@ -552,21 +545,21 @@ // Do not delete node to move from its parent here because // caller is iterating over children of that parent node. ToNode = &ToNodeParent.moveToChildContext( - NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false); + NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false); } else { // Destination node exists, merge samples for the context tree - mergeContextNode(FromNode, *ToNode, ContextStrToRemove); + mergeContextNode(FromNode, *ToNode, ContextFramesToRemove); LLVM_DEBUG({ if (ToNode->getFunctionSamples()) dbgs() << " Context promoted and merged to: " - << ToNode->getFunctionSamples()->getContext() << "\n"; + << ToNode->getFunctionSamples()->getContext().toString() << "\n"; }); // Recursively promote and merge children for (auto &It : FromNode.getAllChildContext()) { ContextTrieNode &FromChildNode = It.second; promoteMergeContextSamplesTree(FromChildNode, *ToNode, - ContextStrToRemove); + ContextFramesToRemove); } // Remove children once they're all merged diff --git a/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof.extbin b/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof.extbin deleted file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@getProfiles()) { - StringRef FName = PD.getKey(); - const sampleprof::FunctionSamples &FS = PD.getValue(); - auto It = InstrProfileMap.find(FName); + auto &FContext = PD.first; + const sampleprof::FunctionSamples &FS = PD.second; + auto It = InstrProfileMap.find(FContext.toString()); if (FS.getHeadSamples() > ColdSampleThreshold && It != InstrProfileMap.end() && It->second.MaxCount <= ColdInstrThreshold && @@ -690,7 +690,7 @@ bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode) { using namespace sampleprof; - StringMap ProfileMap; + SampleProfileMap ProfileMap; SmallVector, 5> Readers; LLVMContext Context; sampleprof::ProfileSymbolList WriterList; @@ -716,7 +716,7 @@ continue; } - StringMap &Profiles = Reader->getProfiles(); + SampleProfileMap &Profiles = Reader->getProfiles(); if (ProfileIsProbeBased.hasValue() && ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased) exitWithError( @@ -725,19 +725,19 @@ if (ProfileIsCS.hasValue() && ProfileIsCS != FunctionSamples::ProfileIsCS) exitWithError("cannot merge CS profile with non-CS profile"); ProfileIsCS = FunctionSamples::ProfileIsCS; - for (StringMap::iterator I = Profiles.begin(), - E = Profiles.end(); + for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end(); I != E; ++I) { sampleprof_error Result = sampleprof_error::success; FunctionSamples Remapped = Remapper ? remapSamples(I->second, *Remapper, Result) : FunctionSamples(); FunctionSamples &Samples = Remapper ? Remapped : I->second; - StringRef FName = Samples.getNameWithContext(); - MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight)); + SampleContext FContext = Samples.getContext(); + MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight)); if (Result != sampleprof_error::success) { std::error_code EC = make_error_code(Result); - handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName); + handleMergeWriterError(errorCodeToError(EC), Input.Filename, + FContext.toString()); } } @@ -1022,8 +1022,8 @@ namespace { struct SampleOverlapStats { - StringRef BaseName; - StringRef TestName; + SampleContext BaseName; + SampleContext TestName; // Number of overlap units uint64_t OverlapCount; // Total samples of overlap units @@ -1226,6 +1226,9 @@ /// Load profiles specified by BaseFilename and TestFilename. std::error_code loadProfiles(); + using FuncSampleStatsMap = + std::unordered_map; + private: SampleOverlapStats ProfOverlap; SampleOverlapStats HotFuncOverlap; @@ -1236,8 +1239,8 @@ std::unique_ptr TestReader; // BaseStats and TestStats hold FuncSampleStats for each function, with // function name as the key. - StringMap BaseStats; - StringMap TestStats; + FuncSampleStatsMap BaseStats; + FuncSampleStatsMap TestStats; // Low similarity threshold in floating point number double LowSimilarityThreshold; // Block samples above BaseHotThreshold or TestHotThreshold are considered hot @@ -1276,8 +1279,8 @@ void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount); - void getHotFunctions(const StringMap &ProfStats, - StringMap &HotFunc, + void getHotFunctions(const FuncSampleStatsMap &ProfStats, + FuncSampleStatsMap &HotFunc, uint64_t HotThreshold) const; void computeHotFuncOverlap(); @@ -1381,26 +1384,26 @@ } void SampleOverlapAggregator::getHotFunctions( - const StringMap &ProfStats, - StringMap &HotFunc, uint64_t HotThreshold) const { + const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc, + uint64_t HotThreshold) const { for (const auto &F : ProfStats) { if (isFunctionHot(F.second, HotThreshold)) - HotFunc.try_emplace(F.first(), F.second); + HotFunc.emplace(F.first, F.second); } } void SampleOverlapAggregator::computeHotFuncOverlap() { - StringMap BaseHotFunc; + FuncSampleStatsMap BaseHotFunc; getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold); HotFuncOverlap.BaseCount = BaseHotFunc.size(); - StringMap TestHotFunc; + FuncSampleStatsMap TestHotFunc; getHotFunctions(TestStats, TestHotFunc, TestHotThreshold); HotFuncOverlap.TestCount = TestHotFunc.size(); HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount; for (const auto &F : BaseHotFunc) { - if (TestHotFunc.count(F.first())) + if (TestHotFunc.count(F.first)) ++HotFuncOverlap.OverlapCount; else ++HotFuncOverlap.UnionCount; @@ -1612,18 +1615,19 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) { using namespace sampleprof; - StringMap BaseFuncProf; + std::unordered_map + BaseFuncProf; const auto &BaseProfiles = BaseReader->getProfiles(); for (const auto &BaseFunc : BaseProfiles) { - BaseFuncProf.try_emplace(BaseFunc.second.getNameWithContext(), - &(BaseFunc.second)); + BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second)); } ProfOverlap.UnionCount = BaseFuncProf.size(); const auto &TestProfiles = TestReader->getProfiles(); for (const auto &TestFunc : TestProfiles) { SampleOverlapStats FuncOverlap; - FuncOverlap.TestName = TestFunc.second.getNameWithContext(); + FuncOverlap.TestName = TestFunc.second.getContext(); assert(TestStats.count(FuncOverlap.TestName) && "TestStats should have records for all functions in test profile " "except inlinees"); @@ -1650,7 +1654,7 @@ // Two functions match with each other. Compute function-level overlap and // aggregate them into profile-level overlap. - FuncOverlap.BaseName = Match->second->getNameWithContext(); + FuncOverlap.BaseName = Match->second->getContext(); assert(BaseStats.count(FuncOverlap.BaseName) && "BaseStats should have records for all functions in base profile " "except inlinees"); @@ -1683,8 +1687,8 @@ (Match != BaseFuncProf.end() && FuncOverlap.Similarity < LowSimilarityThreshold) || (Match != BaseFuncProf.end() && !FuncFilter.NameFilter.empty() && - FuncOverlap.BaseName.find(FuncFilter.NameFilter) != - FuncOverlap.BaseName.npos)) { + FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) != + std::string::npos)) { assert(ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0"); FuncOverlap.BaseWeight = @@ -1699,11 +1703,10 @@ // Traverse through functions in base profile but not in test profile. for (const auto &F : BaseFuncProf) { - assert(BaseStats.count(F.second->getNameWithContext()) && + assert(BaseStats.count(F.second->getContext()) && "BaseStats should have records for all functions in base profile " "except inlinees"); - const FuncSampleStats &FuncStats = - BaseStats[F.second->getNameWithContext()]; + const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()]; ++ProfOverlap.BaseUniqueCount; ProfOverlap.BaseUniqueSample += FuncStats.SampleSum; @@ -1734,7 +1737,7 @@ FuncSampleStats FuncStats; getFuncSampleStats(I.second, FuncStats, BaseHotThreshold); ProfOverlap.BaseSample += FuncStats.SampleSum; - BaseStats.try_emplace(I.second.getNameWithContext(), FuncStats); + BaseStats.emplace(I.second.getContext(), FuncStats); } const auto &TestProf = TestReader->getProfiles(); @@ -1743,7 +1746,7 @@ FuncSampleStats FuncStats; getFuncSampleStats(I.second, FuncStats, TestHotThreshold); ProfOverlap.TestSample += FuncStats.SampleSum; - TestStats.try_emplace(I.second.getNameWithContext(), FuncStats); + TestStats.emplace(I.second.getContext(), FuncStats); } ProfOverlap.BaseName = StringRef(BaseFilename); @@ -1807,13 +1810,15 @@ FOS.PadToColumn(TestSampleCol); FOS << F.second.TestSample; FOS.PadToColumn(FuncNameCol); - FOS << F.second.TestName << "\n"; + FOS << F.second.TestName.toString() << "\n"; } } void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const { - OS << "Profile overlap infomation for base_profile: " << ProfOverlap.BaseName - << " and test_profile: " << ProfOverlap.TestName << "\nProgram level:\n"; + OS << "Profile overlap infomation for base_profile: " + << ProfOverlap.BaseName.toString() + << " and test_profile: " << ProfOverlap.TestName.toString() + << "\nProgram level:\n"; OS << " Whole program profile similarity: " << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n"; @@ -2271,7 +2276,7 @@ namespace { struct HotFuncInfo { - StringRef FuncName; + std::string FuncName; uint64_t TotalCount; double TotalCountPercent; uint64_t MaxCount; @@ -2282,8 +2287,8 @@ EntryCount(0) {} HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES) - : FuncName(FN), TotalCount(TS), TotalCountPercent(TSP), MaxCount(MS), - EntryCount(ES) {} + : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP), + MaxCount(MS), EntryCount(ES) {} }; } // namespace @@ -2339,9 +2344,8 @@ } } -static int -showHotFunctionList(const StringMap &Profiles, - ProfileSummary &PS, raw_fd_ostream &OS) { +static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles, + ProfileSummary &PS, raw_fd_ostream &OS) { using namespace sampleprof; const uint32_t HotFuncCutoff = 990000; @@ -2391,8 +2395,8 @@ ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample : 0; PrintValues.emplace_back(HotFuncInfo( - Func.getNameWithContext(), Func.getTotalSamples(), TotalSamplePercent, - FuncPair.second.second, Func.getEntrySamples())); + Func.getContext().toString(), Func.getTotalSamples(), + TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples())); } dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount, Profiles.size(), HotFuncSample, ProfileTotalSample, @@ -2426,7 +2430,8 @@ if (ShowAllFunctions || ShowFunction.empty()) Reader->dump(OS); else - Reader->dumpFunctionProfile(ShowFunction, OS); + // TODO: parse context string to support filtering by contexts. + Reader->dumpFunctionProfile(StringRef(ShowFunction), OS); if (ShowProfileSymbolList) { std::unique_ptr ReaderList = diff --git a/llvm/tools/llvm-profgen/CSPreInliner.h b/llvm/tools/llvm-profgen/CSPreInliner.h --- a/llvm/tools/llvm-profgen/CSPreInliner.h +++ b/llvm/tools/llvm-profgen/CSPreInliner.h @@ -67,7 +67,7 @@ // size by only keep context that is estimated to be inlined. class CSPreInliner { public: - CSPreInliner(StringMap &Profiles, ProfiledBinary &Binary, + CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, uint64_t HotThreshold, uint64_t ColdThreshold); void run(); @@ -80,7 +80,7 @@ uint32_t getFuncSize(const FunctionSamples &FSamples); bool UseContextCost; SampleContextTracker ContextTracker; - StringMap &ProfileMap; + SampleProfileMap &ProfileMap; ProfiledBinary &Binary; // Count thresholds to answer isHotCount and isColdCount queries. diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -40,9 +40,8 @@ cl::desc( "Replay previous inlining and adjust context profile accordingly")); -CSPreInliner::CSPreInliner(StringMap &Profiles, - ProfiledBinary &Binary, uint64_t HotThreshold, - uint64_t ColdThreshold) +CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, + uint64_t HotThreshold, uint64_t ColdThreshold) : UseContextCost(UseContextCostForPreInliner), ContextTracker(Profiles), ProfileMap(Profiles), Binary(Binary), HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {} @@ -169,7 +168,7 @@ } LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined") << " context profile for: " - << Candidate.CalleeSamples->getNameWithContext() + << Candidate.CalleeSamples->getContext().toString() << " (callee size: " << Candidate.SizeCost << ", call count:" << Candidate.CallsiteCount << ")\n"); } @@ -186,7 +185,7 @@ CQueue.pop(); bool WasInlined = Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined); - dbgs() << " " << Candidate.CalleeSamples->getNameWithContext() + dbgs() << " " << Candidate.CalleeSamples->getContext().toString() << " (candidate size:" << Candidate.SizeCost << ", call count: " << Candidate.CallsiteCount << ", previously " << (WasInlined ? "inlined)\n" : "not inlined)\n"); @@ -196,13 +195,12 @@ void CSPreInliner::run() { #ifndef NDEBUG - auto printProfileNames = [](StringMap &Profiles, - bool IsInput) { + auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) { dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" << Profiles.size() << " total):\n"; for (auto &It : Profiles) { const FunctionSamples &Samples = It.second; - dbgs() << " [" << Samples.getNameWithContext() << "] " + dbgs() << " [" << Samples.getContext().toString() << "] " << Samples.getTotalSamples() << ":" << Samples.getHeadSamples() << "\n"; } @@ -224,17 +222,17 @@ // Not inlined context profiles are merged into its base, so we can // trim out such profiles from the output. - std::vector ProfilesToBeRemoved; + std::vector ProfilesToBeRemoved; for (auto &It : ProfileMap) { SampleContext Context = It.second.getContext(); if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) { assert(Context.hasState(MergedContext) && "Not inlined context profile should be merged already"); - ProfilesToBeRemoved.push_back(It.first()); + ProfilesToBeRemoved.push_back(It.first); } } - for (StringRef ContextName : ProfilesToBeRemoved) { + for (auto &ContextName : ProfilesToBeRemoved) { ProfileMap.erase(ContextName); } diff --git a/llvm/tools/llvm-profgen/CallContext.h b/llvm/tools/llvm-profgen/CallContext.h --- a/llvm/tools/llvm-profgen/CallContext.h +++ b/llvm/tools/llvm-profgen/CallContext.h @@ -17,25 +17,20 @@ namespace llvm { namespace sampleprof { -// Function name, LineLocation -typedef std::pair FrameLocation; - -typedef SmallVector FrameLocationStack; - -inline std::string getCallSite(const FrameLocation &Callsite) { - std::string CallsiteStr = Callsite.first; +inline std::string getCallSite(const SampleContextFrame &Callsite) { + std::string CallsiteStr = Callsite.CallerName.str(); CallsiteStr += ":"; - CallsiteStr += Twine(Callsite.second.LineOffset).str(); - if (Callsite.second.Discriminator > 0) { + CallsiteStr += Twine(Callsite.Callsite.LineOffset).str(); + if (Callsite.Callsite.Discriminator > 0) { CallsiteStr += "."; - CallsiteStr += Twine(Callsite.second.Discriminator).str(); + CallsiteStr += Twine(Callsite.Callsite.Discriminator).str(); } return CallsiteStr; } // TODO: This operation is expansive. If it ever gets called multiple times we // may think of making a class wrapper with internal states for it. -inline std::string getLocWithContext(const FrameLocationStack &Context) { +inline std::string getLocWithContext(const SampleContextFrameVector &Context) { std::ostringstream OContextStr; for (const auto &Callsite : Context) { if (OContextStr.str().size()) @@ -48,7 +43,7 @@ // Reverse call context, i.e., in the order of callee frames to caller frames, // is useful during instruction printing or pseudo probe printing. inline std::string -getReversedLocWithContext(const FrameLocationStack &Context) { +getReversedLocWithContext(const SampleContextFrameVector &Context) { std::ostringstream OContextStr; for (const auto &Callsite : reverse(Context)) { if (OContextStr.str().size()) diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -344,7 +344,8 @@ // String based context id struct StringBasedCtxKey : public ContextKey { - std::string Context; + SampleContextFrameVector Context; + bool WasLeafInlined; StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){}; static bool classof(const ContextKey *K) { @@ -356,7 +357,7 @@ return Context == Other->Context; } - void genHashCode() { HashCode = hash_value(Context); } + void genHashCode() { HashCode = hash_value(SampleContextFrames(Context)); } }; // Probe based context key as the intermediate key of context diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -92,8 +92,7 @@ std::shared_ptr FrameStack::getContextKey() { std::shared_ptr KeyStr = std::make_shared(); - KeyStr->Context = - Binary->getExpandedContextStr(Stack, KeyStr->WasLeafInlined); + KeyStr->Context = Binary->getExpandedContext(Stack, KeyStr->WasLeafInlined); if (KeyStr->Context.empty()) return nullptr; KeyStr->genHashCode(); @@ -321,21 +320,19 @@ static std::string getContextKeyStr(ContextKey *K, const ProfiledBinary *Binary) { - std::string ContextStr; if (const auto *CtxKey = dyn_cast(K)) { - return CtxKey->Context; + return SampleContext::getContextString(CtxKey->Context); } else if (const auto *CtxKey = dyn_cast(K)) { - SmallVector ContextStack; + SampleContextFrameVector ContextStack; for (const auto *Probe : CtxKey->Probes) { Binary->getInlineContextForProbe(Probe, ContextStack, true); } - for (const auto &Context : ContextStack) { - if (ContextStr.size()) - ContextStr += " @ "; - ContextStr += Context; - } + // Probe context key at this point does not have leaf probe, so do not + // include the leaf inline location. + return SampleContext::getContextString(ContextStack, true); + } else { + llvm_unreachable("unexpected key type"); } - return ContextStr; } static void printRangeCounter(ContextSampleCounterMap &Counter, diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -33,7 +33,7 @@ virtual void generateProfile() = 0; // Use SampleProfileWriter to serialize profile map virtual void write(std::unique_ptr Writer, - StringMap &ProfileMap); + SampleProfileMap &ProfileMap); void write(); protected: @@ -56,7 +56,7 @@ const RangeSample &Ranges); // Used by SampleProfileWriter - StringMap ProfileMap; + SampleProfileMap ProfileMap; ProfiledBinary *Binary = nullptr; }; @@ -190,31 +190,28 @@ protected: // Lookup or create FunctionSamples for the context - FunctionSamples &getFunctionProfileForContext(StringRef ContextId, + FunctionSamples &getFunctionProfileForContext(SampleContextFrames ContextId, bool WasLeafInlined = false); // Post processing for profiles before writing out, such as mermining // and trimming cold profiles, running preinliner on profiles. void postProcessProfiles(); void computeSummaryAndThreshold(); void write(std::unique_ptr Writer, - StringMap &ProfileMap) override; + SampleProfileMap &ProfileMap) override; // Thresholds from profile summary to answer isHotCount/isColdCount queries. uint64_t HotCountThreshold; uint64_t ColdCountThreshold; - // String table owning context strings created from profile generation. - std::unordered_set ContextStrings; - private: // Helper function for updating body sample for a leaf location in // FunctionProfile void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, - const FrameLocation &LeafLoc, + const SampleContextFrame &LeafLoc, uint64_t Count); void populateFunctionBodySamples(FunctionSamples &FunctionProfile, const RangeSample &RangeCounters); - void populateFunctionBoundarySamples(StringRef ContextId, + void populateFunctionBoundarySamples(SampleContextFrames ContextId, FunctionSamples &FunctionProfile, const BranchSample &BranchCounters); void populateInferredFunctionSamples(); @@ -243,22 +240,18 @@ void extractProbesFromRange(const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter); // Fill in function body samples from probes - void - populateBodySamplesWithProbes(const RangeSample &RangeCounter, - SmallVectorImpl &ContextStrStack); + void populateBodySamplesWithProbes(const RangeSample &RangeCounter, + SampleContextFrames ContextStack); // Fill in boundary samples for a call probe - void populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, - SmallVectorImpl &ContextStrStack); - // Helper function to get FunctionSamples for the leaf inlined context - FunctionSamples & - getFunctionProfileForLeafProbe(SmallVectorImpl &ContextStrStack, - const MCPseudoProbeFuncDesc *LeafFuncDesc, - bool WasLeafInlined); + void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, + SampleContextFrames ContextStack); // Helper function to get FunctionSamples for the leaf probe FunctionSamples & - getFunctionProfileForLeafProbe(SmallVectorImpl &ContextStrStack, + getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe); + + // Underlying context table serves for sample profile writer. + std::unordered_set Contexts; }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -90,7 +90,7 @@ } void ProfileGenerator::write(std::unique_ptr Writer, - StringMap &ProfileMap) { + SampleProfileMap &ProfileMap) { if (std::error_code EC = Writer->write(ProfileMap)) exitWithError(std::move(EC)); } @@ -201,19 +201,16 @@ } FunctionSamples & -CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr, +CSProfileGenerator::getFunctionProfileForContext(SampleContextFrames Context, bool WasLeafInlined) { - auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples()); + SampleContext FContext(Context); + auto Ret = ProfileMap.emplace(Context, FunctionSamples()); if (Ret.second) { - // Make a copy of the underlying context string in string table - // before StringRef wrapper is used for context. - auto It = ContextStrings.insert(ContextStr.str()); - SampleContext FContext(*It.first, RawContext); + SampleContext FContext(Context, RawContext); if (WasLeafInlined) FContext.setAttribute(ContextWasInlined); FunctionSamples &FProfile = Ret.first->second; FProfile.setContext(FContext); - FProfile.setName(FContext.getNameWithoutContext()); } return Ret.first->second; } @@ -223,15 +220,14 @@ for (const auto &CI : SampleCounters) { const StringBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); - StringRef ContextId(CtxKey->Context); // Get or create function profile for the range FunctionSamples &FunctionProfile = - getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined); + getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); // Fill in function body samples populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter); // Fill in boundary sample counts as well as call site samples for calls - populateFunctionBoundarySamples(ContextId, FunctionProfile, + populateFunctionBoundarySamples(CtxKey->Context, FunctionProfile, CI.second.BranchCounter); } // Fill in call site value sample for inlined calls and also use context to @@ -244,18 +240,18 @@ } void CSProfileGenerator::updateBodySamplesforFunctionProfile( - FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc, + FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, uint64_t Count) { // Filter out invalid negative(int type) lineOffset - if (LeafLoc.second.LineOffset & 0x80000000) + if (LeafLoc.Callsite.LineOffset & 0x80000000) return; // Use the maximum count of samples with same line location ErrorOr R = FunctionProfile.findSamplesAt( - LeafLoc.second.LineOffset, LeafLoc.second.Discriminator); + LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator); uint64_t PreviousCount = R ? R.get() : 0; if (PreviousCount < Count) { - FunctionProfile.addBodySamples(LeafLoc.second.LineOffset, - LeafLoc.second.Discriminator, + FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset, + LeafLoc.Callsite.Discriminator, Count - PreviousCount); } } @@ -299,7 +295,7 @@ } void CSProfileGenerator::populateFunctionBoundarySamples( - StringRef ContextId, FunctionSamples &FunctionProfile, + SampleContextFrames ContextId, FunctionSamples &FunctionProfile, const BranchSample &BranchCounters) { for (auto Entry : BranchCounters) { @@ -316,44 +312,36 @@ auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); if (!LeafLoc.hasValue()) continue; - FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset, - LeafLoc->second.Discriminator, + FunctionProfile.addCalledTargetSamples(LeafLoc->Callsite.LineOffset, + LeafLoc->Callsite.Discriminator, CalleeName, Count); // Record head sample for called target(callee) - std::ostringstream OCalleeCtxStr; - if (ContextId.find(" @ ") != StringRef::npos) { - OCalleeCtxStr << ContextId.rsplit(" @ ").first.str(); - OCalleeCtxStr << " @ "; - } - OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str(); - - FunctionSamples &CalleeProfile = - getFunctionProfileForContext(OCalleeCtxStr.str()); + SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end()); + assert(CalleeCtx.back().CallerName == LeafLoc->CallerName && + "Leaf function name doesn't match"); + CalleeCtx.back() = *LeafLoc; + CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); + FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); assert(Count != 0 && "Unexpected zero weight branch"); CalleeProfile.addHeadSamples(Count); } } -static FrameLocation getCallerContext(StringRef CalleeContext, - StringRef &CallerNameWithContext) { - StringRef CallerContext = CalleeContext.rsplit(" @ ").first; - CallerNameWithContext = CallerContext.rsplit(':').first; - auto ContextSplit = CallerContext.rsplit(" @ "); - StringRef CallerFrameStr = ContextSplit.second.size() == 0 - ? ContextSplit.first - : ContextSplit.second; - FrameLocation LeafFrameLoc = {"", {0, 0}}; - StringRef Funcname; - SampleContext::decodeContextString(CallerFrameStr, Funcname, - LeafFrameLoc.second); - LeafFrameLoc.first = Funcname.str(); - return LeafFrameLoc; +static SampleContextFrame +getCallerContext(SampleContextFrames CalleeContext, + SampleContextFrameVector &CallerContext) { + assert(CalleeContext.size() > 1 && "Unexpected empty context"); + CalleeContext = CalleeContext.drop_back(); + CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); + SampleContextFrame CallerFrame = CallerContext.back(); + CallerContext.back().Callsite = LineLocation(0, 0); + return CallerFrame; } void CSProfileGenerator::populateInferredFunctionSamples() { for (const auto &Item : ProfileMap) { - const StringRef CalleeContext = Item.first(); + const auto &CalleeContext = Item.first; const FunctionSamples &CalleeProfile = Item.second; // If we already have head sample counts, we must have value profile @@ -362,21 +350,22 @@ continue; // If we don't have context, nothing to do for caller's call site. // This could happen for entry point function. - if (CalleeContext.find(" @ ") == StringRef::npos) + if (CalleeContext.isBaseContext()) continue; // Infer Caller's frame loc and context ID through string splitting - StringRef CallerContextId; - FrameLocation &&CallerLeafFrameLoc = - getCallerContext(CalleeContext, CallerContextId); + SampleContextFrameVector CallerContextId; + SampleContextFrame &&CallerLeafFrameLoc = + getCallerContext(CalleeContext.getContextFrames(), CallerContextId); + SampleContextFrames CallerContext(CallerContextId); // It's possible that we haven't seen any sample directly in the caller, // in which case CallerProfile will not exist. But we can't modify // ProfileMap while iterating it. // TODO: created function profile for those callers too - if (ProfileMap.find(CallerContextId) == ProfileMap.end()) + if (ProfileMap.find(CallerContext) == ProfileMap.end()) continue; - FunctionSamples &CallerProfile = ProfileMap[CallerContextId]; + FunctionSamples &CallerProfile = ProfileMap[CallerContext]; // Since we don't have call count for inlined functions, we // estimate it from inlinee's profile using entry body sample. @@ -385,11 +374,11 @@ if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) EstimatedCallCount = 1; CallerProfile.addCalledTargetSamples( - CallerLeafFrameLoc.second.LineOffset, - CallerLeafFrameLoc.second.Discriminator, - CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount); - CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset, - CallerLeafFrameLoc.second.Discriminator, + CallerLeafFrameLoc.Callsite.LineOffset, + CallerLeafFrameLoc.Callsite.Discriminator, + CalleeProfile.getContext().getName(), EstimatedCallCount); + CallerProfile.addBodySamples(CallerLeafFrameLoc.Callsite.LineOffset, + CallerLeafFrameLoc.Callsite.Discriminator, EstimatedCallCount); CallerProfile.addTotalSamples(EstimatedCallCount); } @@ -434,7 +423,7 @@ } void CSProfileGenerator::write(std::unique_ptr Writer, - StringMap &ProfileMap) { + SampleProfileMap &ProfileMap) { if (std::error_code EC = Writer->write(ProfileMap)) exitWithError(std::move(EC)); } @@ -443,11 +432,11 @@ // Extract context stack for reusing, leaf context stack will // be added compressed while looking up function profile static void extractPrefixContextStack( - SmallVectorImpl &ContextStrStack, + SampleContextFrameVector &ContextStack, const SmallVectorImpl &Probes, ProfiledBinary *Binary) { for (const auto *P : Probes) { - Binary->getInlineContextForProbe(P, ContextStrStack, true); + Binary->getInlineContextForProbe(P, ContextStack, true); } } @@ -458,13 +447,13 @@ for (const auto &CI : SampleCounters) { const ProbeBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); - SmallVector ContextStrStack; - extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary); + SampleContextFrameVector ContextStack; + extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); // Fill in function body samples from probes, also infer caller's samples // from callee's probe - populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack); + populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); // Fill in boundary samples for a call probe - populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStrStack); + populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); } postProcessProfiles(); @@ -509,8 +498,7 @@ } void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( - const RangeSample &RangeCounter, - SmallVectorImpl &ContextStrStack) { + const RangeSample &RangeCounter, SampleContextFrames ContextStack) { ProbeCounterMap ProbeCounter; // Extract the top frame probes by looking up each address among the range in // the Address2ProbeMap @@ -522,7 +510,7 @@ const MCDecodedPseudoProbe *Probe = PI.first; uint64_t Count = PI.second; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStrStack, Probe); + getFunctionProfileForLeafProbe(ContextStack, Probe); // Record the current frame and FunctionProfile whenever samples are // collected for non-danglie probes. This is for reporting all of the // zero count probes of the frame later. @@ -537,22 +525,24 @@ // Since the context id will be compressed, we have to use callee's // context id to infer caller's context id to ensure they share the // same context prefix. - StringRef CalleeContextId = - FunctionProfile.getContext().getNameWithContext(); - StringRef CallerContextId; - FrameLocation &&CallerLeafFrameLoc = + SampleContextFrames CalleeContextId = + FunctionProfile.getContext().getContextFrames(); + SampleContextFrameVector CallerContextId; + SampleContextFrame &&CallerLeafFrameLoc = getCallerContext(CalleeContextId, CallerContextId); - uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset; + uint64_t CallerIndex = CallerLeafFrameLoc.Callsite.LineOffset; assert(CallerIndex && "Inferred caller's location index shouldn't be zero!"); + // Save the new context for future references. + SampleContextFrames CallerContext = + *Contexts.insert(CallerContextId).first; FunctionSamples &CallerProfile = - getFunctionProfileForContext(CallerContextId); + getFunctionProfileForContext(CallerContext); CallerProfile.setFunctionHash(InlinerDesc->FuncHash); CallerProfile.addBodySamples(CallerIndex, 0, Count); CallerProfile.addTotalSamples(Count); CallerProfile.addCalledTargetSamples( - CallerIndex, 0, - FunctionProfile.getContext().getNameWithoutContext(), Count); + CallerIndex, 0, FunctionProfile.getContext().getName(), Count); } } } @@ -570,8 +560,7 @@ } void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, - SmallVectorImpl &ContextStrStack) { + const BranchSample &BranchCounter, SampleContextFrames ContextStack) { for (auto BI : BranchCounter) { uint64_t SourceOffset = BI.first.first; uint64_t TargetOffset = BI.first.second; @@ -582,7 +571,7 @@ if (CallProbe == nullptr) continue; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStrStack, CallProbe); + getFunctionProfileForLeafProbe(ContextStack, CallProbe); FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); FunctionProfile.addTotalSamples(Count); StringRef CalleeName = FunctionSamples::getCanonicalFnName( @@ -595,46 +584,31 @@ } FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( - SmallVectorImpl &ContextStrStack, - const MCPseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) { - assert(ContextStrStack.size() && "Profile context must have the leaf frame"); - // Compress the context string except for the leaf frame - std::string LeafFrame = ContextStrStack.back(); - ContextStrStack.pop_back(); - CSProfileGenerator::compressRecursionContext(ContextStrStack); - CSProfileGenerator::trimContext(ContextStrStack); - - std::ostringstream OContextStr; - for (uint32_t I = 0; I < ContextStrStack.size(); I++) { - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << ContextStrStack[I]; - } + SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { + + // Explicitly copy the context for appending the leaf context + SampleContextFrameVector NewContextStack(ContextStack.begin(), + ContextStack.end()); + Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); // For leaf inlined context with the top frame, we should strip off the top // frame's probe id, like: // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << StringRef(LeafFrame).split(":").first.str(); - - FunctionSamples &FunctionProile = - getFunctionProfileForContext(OContextStr.str(), WasLeafInlined); - FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash); - return FunctionProile; -} - -FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( - SmallVectorImpl &ContextStrStack, - const MCDecodedPseudoProbe *LeafProbe) { + auto LeafFrame = NewContextStack.back(); + LeafFrame.Callsite = LineLocation(0, 0); + NewContextStack.pop_back(); + // Compress the context string except for the leaf frame + CSProfileGenerator::compressRecursionContext(NewContextStack); + CSProfileGenerator::trimContext(NewContextStack); + NewContextStack.push_back(LeafFrame); + // Save the new context for future references. + SampleContextFrames NewContext = *Contexts.insert(NewContextStack).first; - // Explicitly copy the context for appending the leaf context - SmallVector ContextStrStackCopy(ContextStrStack.begin(), - ContextStrStack.end()); - Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true); const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); - return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc, - WasLeafInlined); + FunctionSamples &FunctionProile = + getFunctionProfileForContext(NewContext, WasLeafInlined); + FunctionProile.setFunctionHash(FuncDesc->FuncHash); + return FunctionProile; } } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -110,7 +110,7 @@ class BinarySizeContextTracker { public: // Add instruction with given size to a context - void addInstructionForContext(const FrameLocationStack &Context, + void addInstructionForContext(const SampleContextFrameVector &Context, uint32_t InstrSize); // Get function size with a specific context. When there's no exact match @@ -164,7 +164,7 @@ // Function offset to name mapping. std::unordered_map FuncStartAddrMap; // Offset to context location map. Used to expand the context. - std::unordered_map Offset2LocStackMap; + std::unordered_map Offset2LocStackMap; // An array of offsets of all instructions sorted in increasing order. The // sorting is needed to fast advance to the next forward/backward instruction. std::vector CodeAddrs; @@ -182,6 +182,9 @@ // The symbolizer used to get inline context for an instruction. std::unique_ptr Symbolizer; + // String table owning function name strings created from the symbolizer. + std::unordered_set NameStrings; + // Pseudo probe decoder MCPseudoProbeDecoder ProbeDecoder; @@ -214,9 +217,9 @@ bool dissassembleSymbol(std::size_t SI, ArrayRef Bytes, SectionSymbolsTy &Symbols, const SectionRef &Section); /// Symbolize a given instruction pointer and return a full call context. - FrameLocationStack symbolize(const InstructionPointer &IP, - bool UseCanonicalFnName = false, - bool UseProbeDiscriminator = false); + SampleContextFrameVector symbolize(const InstructionPointer &IP, + bool UseCanonicalFnName = false, + bool UseProbeDiscriminator = false); /// Decode the interesting parts of the binary and build internal data /// structures. On high level, the parts of interest are: @@ -226,7 +229,7 @@ /// 3. Pseudo probe related sections, used by probe-based profile /// generation. void load(); - const FrameLocationStack &getFrameLocationStack(uint64_t Offset) const { + const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const { auto I = Offset2LocStackMap.find(Offset); assert(I != Offset2LocStackMap.end() && "Can't find location for offset in the binary"); @@ -307,7 +310,7 @@ return FuncSizeTracker.getFuncSizeForContext(Context); } - Optional getInlineLeafFrameLoc(uint64_t Offset) { + Optional getInlineLeafFrameLoc(uint64_t Offset) { const auto &Stack = getFrameLocationStack(Offset); if (Stack.empty()) return {}; @@ -317,22 +320,27 @@ // Compare two addresses' inline context bool inlineContextEqual(uint64_t Add1, uint64_t Add2) const; - // Get the context string of the current stack with inline context filled in. + // Get the full context of the current stack with inline context filled in. // It will search the disassembling info stored in Offset2LocStackMap. This is // used as the key of function sample map - std::string getExpandedContextStr(const SmallVectorImpl &Stack, - bool &WasLeafInlined) const; + SampleContextFrameVector + getExpandedContext(const SmallVectorImpl &Stack, + bool &WasLeafInlined) const; const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { return ProbeDecoder.getCallProbeForAddr(Address); } - void - getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, - SmallVectorImpl &InlineContextStack, - bool IncludeLeaf = false) const { - return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack, - IncludeLeaf); + void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, + SampleContextFrameVector &InlineContextStack, + bool IncludeLeaf = false) const { + SmallVector ProbeInlineContext; + ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext, + IncludeLeaf); + for (auto &Callsite : ProbeInlineContext) { + InlineContextStack.emplace_back(Callsite.first, + LineLocation(Callsite.second, 0)); + } } const AddressProbesMap &getAddress2ProbesMap() const { return ProbeDecoder.getAddress2ProbesMap(); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -53,12 +53,12 @@ } void BinarySizeContextTracker::addInstructionForContext( - const FrameLocationStack &Context, uint32_t InstrSize) { + const SampleContextFrameVector &Context, uint32_t InstrSize) { ContextTrieNode *CurNode = &RootContext; bool IsLeaf = true; for (const auto &Callsite : reverse(Context)) { - StringRef CallerName = Callsite.first; - LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.second; + StringRef CallerName = Callsite.CallerName; + LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Callsite; CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName); IsLeaf = false; } @@ -70,23 +70,20 @@ BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) { ContextTrieNode *CurrNode = &RootContext; ContextTrieNode *PrevNode = nullptr; - StringRef ContextRemain = Context; - StringRef ChildContext; - StringRef CallerName; + SampleContextFrames Frames = Context.getContextFrames(); + int32_t I = Frames.size() - 1; Optional Size; - // Start from top-level context-less function, travese down the reverse + // Start from top-level context-less function, traverse down the reverse // context trie to find the best/longest match for given context, then // retrieve the size. - while (CurrNode && !ContextRemain.empty()) { - // rsplit so we process from leaf function to callers (added to context). - auto ContextSplit = SampleContext::rsplitContextString(ContextRemain); - ChildContext = ContextSplit.second; - ContextRemain = ContextSplit.first; - LineLocation CallSiteLoc(0, 0); - SampleContext::decodeContextString(ChildContext, CallerName, CallSiteLoc); + + while (CurrNode && I >= 0) { + // Process from leaf function to callers (added to context). + const auto &ChildFrame = Frames[I--]; PrevNode = CurrNode; - CurrNode = CurrNode->getChildContext(CallSiteLoc, CallerName); + CurrNode = + CurrNode->getChildContext(ChildFrame.Callsite, ChildFrame.CallerName); if (CurrNode && CurrNode->getFunctionSize().hasValue()) Size = CurrNode->getFunctionSize().getValue(); } @@ -186,8 +183,8 @@ uint64_t Address2) const { uint64_t Offset1 = virtualAddrToOffset(Address1); uint64_t Offset2 = virtualAddrToOffset(Address2); - const FrameLocationStack &Context1 = getFrameLocationStack(Offset1); - const FrameLocationStack &Context2 = getFrameLocationStack(Offset2); + const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1); + const SampleContextFrameVector &Context2 = getFrameLocationStack(Offset2); if (Context1.size() != Context2.size()) return false; if (Context1.empty()) @@ -198,46 +195,34 @@ Context2.begin(), Context2.begin() + Context2.size() - 1); } -std::string -ProfiledBinary::getExpandedContextStr(const SmallVectorImpl &Stack, - bool &WasLeafInlined) const { - std::string ContextStr; - SmallVector ContextVec; +SampleContextFrameVector +ProfiledBinary::getExpandedContext(const SmallVectorImpl &Stack, + bool &WasLeafInlined) const { + SampleContextFrameVector ContextVec; // Process from frame root to leaf for (auto Address : Stack) { uint64_t Offset = virtualAddrToOffset(Address); - const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset); + const SampleContextFrameVector &ExpandedContext = + getFrameLocationStack(Offset); // An instruction without a valid debug line will be ignored by sample // processing if (ExpandedContext.empty()) - return std::string(); + return SampleContextFrameVector(); // Set WasLeafInlined to the size of inlined frame count for the last // address which is leaf WasLeafInlined = (ExpandedContext.size() > 1); - for (const auto &Loc : ExpandedContext) { - ContextVec.push_back(getCallSite(Loc)); - } + ContextVec.append(ExpandedContext); } - assert(ContextVec.size() && "Context length should be at least 1"); // Compress the context string except for the leaf frame - std::string LeafFrame = ContextVec.back(); + auto LeafFrame = ContextVec.back(); + LeafFrame.Callsite = LineLocation(0, 0); ContextVec.pop_back(); - CSProfileGenerator::compressRecursionContext(ContextVec); - CSProfileGenerator::trimContext(ContextVec); - - std::ostringstream OContextStr; - for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) { - if (OContextStr.str().size()) { - OContextStr << " @ "; - } - OContextStr << ContextVec[I]; - } - // Only keep the function name for the leaf frame - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << StringRef(LeafFrame).split(":").first.str(); - return OContextStr.str(); + assert(ContextVec.size() && "Context length should be at least 1"); + CSProfileGenerator::compressRecursionContext(ContextVec); + CSProfileGenerator::trimContext(ContextVec); + ContextVec.push_back(LeafFrame); + return ContextVec; } template @@ -363,19 +348,20 @@ // Populate a vector of the symbolized callsite at this location // We don't need symbolized info for probe-based profile, just use an // empty stack as an entry to indicate a valid binary offset - + SampleContextFrameVector SymbolizedCallStack; if (!UsePseudoProbes || TrackFuncContextSize) { InstructionPointer IP(this, Offset); // TODO: reallocation of Offset2LocStackMap will lead to dangling // strings We need ProfiledBinary to owned these string. Offset2LocStackMap[Offset] = symbolize(IP, true, UsePseudoProbes); - FrameLocationStack &SymbolizedCallStack = Offset2LocStackMap[Offset]; + SampleContextFrameVector &SymbolizedCallStack = + Offset2LocStackMap[Offset]; // Record instruction size for the corresponding context if (TrackFuncContextSize && !SymbolizedCallStack.empty()) FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset], Size); } else { - Offset2LocStackMap[Offset] = FrameLocationStack(); + Offset2LocStackMap[Offset] = SampleContextFrameVector(); } // Populate address maps. @@ -519,9 +505,9 @@ Symbolizer = std::make_unique(SymbolizerOpts); } -FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP, - bool UseCanonicalFnName, - bool UseProbeDiscriminator) { +SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, + bool UseCanonicalFnName, + bool UseProbeDiscriminator) { assert(this == IP.Binary && "Binary should only symbolize its own instruction"); auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(), @@ -529,7 +515,7 @@ DIInliningInfo InlineStack = unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName()); - FrameLocationStack CallStack; + SampleContextFrameVector CallStack; for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) { const auto &CallerFrame = InlineStack.getFrame(I); if (CallerFrame.FunctionName == "") @@ -552,8 +538,8 @@ } LineLocation Line(LineOffset, Discriminator); - FrameLocation Callsite(FunctionName.str(), Line); - CallStack.push_back(Callsite); + auto It = NameStrings.insert(FunctionName.str()); + CallStack.emplace_back(*It.first, Line); } return CallStack; diff --git a/llvm/unittests/ProfileData/SampleProfTest.cpp b/llvm/unittests/ProfileData/SampleProfTest.cpp --- a/llvm/unittests/ProfileData/SampleProfTest.cpp +++ b/llvm/unittests/ProfileData/SampleProfTest.cpp @@ -193,7 +193,7 @@ BooSamples.addHeadSamples(1); BooSamples.addBodySamples(1, 0, 1232); - StringMap Profiles; + SampleProfileMap Profiles; Profiles[FooName] = std::move(FooSamples); Profiles[BarName] = std::move(BarSamples); Profiles[BazName] = std::move(BazSamples); @@ -327,7 +327,7 @@ verifyProfileSummary(Summary, M, true, true); } - void addFunctionSamples(StringMap *Smap, const char *Fname, + void addFunctionSamples(SampleProfileMap *Smap, const char *Fname, uint64_t TotalSamples, uint64_t HeadSamples) { StringRef Name(Fname); FunctionSamples FcnSamples; @@ -338,8 +338,8 @@ (*Smap)[Name] = FcnSamples; } - StringMap setupFcnSamplesForElisionTest(StringRef Policy) { - StringMap Smap; + SampleProfileMap setupFcnSamplesForElisionTest(StringRef Policy) { + SampleProfileMap Smap; addFunctionSamples(&Smap, "foo", uint64_t(20301), uint64_t(1437)); if (Policy == "" || Policy == "all") return Smap; @@ -373,7 +373,7 @@ Module M("my_module", Context); setupModuleForElisionTest(&M, Policy); - StringMap ProfMap = setupFcnSamplesForElisionTest(Policy); + SampleProfileMap ProfMap = setupFcnSamplesForElisionTest(Policy); // write profile createWriter(Format, ProfileFile.path());