diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -142,6 +142,9 @@ // A uuid (uint64_t) identifying the function. It is obtained by // llvm::md5(FunctionName) which returns the lower 64 bits. GlobalValue::GUID Function; + // The symbol name for the function. Only populated in the Frame by the reader + // if requested during initialization. This field should not be serialized. + llvm::Optional SymbolName; // The source line offset of the call from the beginning of parent function. uint32_t LineOffset; // The source column number of the call to help distinguish multiple calls @@ -152,6 +155,7 @@ Frame(const Frame &Other) { Function = Other.Function; + SymbolName = Other.SymbolName; LineOffset = Other.LineOffset; Column = Other.Column; IsInlineFrame = Other.IsInlineFrame; @@ -161,12 +165,15 @@ : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} bool operator==(const Frame &Other) const { + // Ignore the SymbolName field to avoid a string compare. Comparing the + // function hash serves the same purpose. return Other.Function == Function && Other.LineOffset == LineOffset && Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; } Frame &operator=(const Frame &Other) { Function = Other.Function; + SymbolName = Other.SymbolName; LineOffset = Other.LineOffset; Column = Other.Column; IsInlineFrame = Other.IsInlineFrame; @@ -214,6 +221,8 @@ void printYAML(raw_ostream &OS) const { OS << " -\n" << " Function: " << Function << "\n" + << " SymbolName: " + << (SymbolName.hasValue() ? SymbolName.getValue() : "") << "\n" << " LineOffset: " << LineOffset << "\n" << " Column: " << Column << "\n" << " Inline: " << IsInlineFrame << "\n"; diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -57,7 +57,8 @@ // \p Path. The binary from which the profile has been collected is specified // via a path in \p ProfiledBinary. static Expected> - create(const Twine &Path, const StringRef ProfiledBinary); + create(const Twine &Path, const StringRef ProfiledBinary, + bool KeepName = false); using GuidMemProfRecordPair = std::pair; using Iterator = InstrProfIterator; @@ -76,9 +77,9 @@ RawMemProfReader(std::unique_ptr Sym, llvm::SmallVectorImpl &Seg, llvm::MapVector &Prof, - CallStackMap &SM) + CallStackMap &SM, bool KeepName = false) : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()), - CallstackProfileData(Prof), StackMap(SM) { + CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) { // We don't call initialize here since there is no raw profile to read. The // test should pass in the raw profile as structured data. @@ -103,8 +104,9 @@ private: RawMemProfReader(std::unique_ptr DataBuffer, - object::OwningBinary &&Bin) - : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)) {} + object::OwningBinary &&Bin, bool KeepName) + : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)), + KeepSymbolName(KeepName) {} Error initialize(); Error readRawProfile(); // Symbolize and cache all the virtual addresses we encounter in the @@ -146,8 +148,12 @@ llvm::MapVector FunctionProfileData; llvm::MapVector::iterator Iter; -}; + // Whether to keep the symbol name for each frame after hashing. + bool KeepSymbolName = false; + // A mapping of the hash to symbol name, only used if KeepSymbolName is true. + llvm::DenseMap GuidToSymbolName; +}; } // namespace memprof } // namespace llvm diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include "llvm/ADT/ArrayRef.h" @@ -174,7 +175,8 @@ } // namespace Expected> -RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) { +RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, + bool KeepName) { auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); if (std::error_code EC = BufferOr.getError()) return report(errorCodeToError(EC), Path.getSingleStringRef()); @@ -193,8 +195,9 @@ return report(BinaryOr.takeError(), ProfiledBinary); } - std::unique_ptr Reader( - new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get()))); + // Use new here since constructor is private. + std::unique_ptr Reader(new RawMemProfReader( + std::move(Buffer), std::move(BinaryOr.get()), KeepName)); if (Error E = Reader->initialize()) { return std::move(E); } @@ -407,21 +410,21 @@ for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; I++) { const auto &DIFrame = DI.getFrame(I); - LLVM_DEBUG( - // Print out the name to guid mapping for debugging. - llvm::dbgs() << "FunctionName: " << DIFrame.FunctionName - << " GUID: " - << IndexedMemProfRecord::getGUID(DIFrame.FunctionName) - << "\n";); - - const Frame F(IndexedMemProfRecord::getGUID(DIFrame.FunctionName), - DIFrame.Line - DIFrame.StartLine, DIFrame.Column, + const uint64_t Guid = + IndexedMemProfRecord::getGUID(DIFrame.FunctionName); + const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); - - const FrameId Id = F.hash(); - IdToFrame.insert({Id, F}); - SymbolizedFrame[VAddr].push_back(Id); + // Here we retain a mapping from the GUID to symbol name instead of + // adding it to the frame object directly to reduce memory overhead. + // This is because there can be many unique frames, particularly for + // callsite frames. + if (KeepSymbolName) + GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); + + const FrameId Hash = F.hash(); + IdToFrame.insert({Hash, F}); + SymbolizedFrame[VAddr].push_back(Hash); } } @@ -525,8 +528,15 @@ return make_error(instrprof_error::eof); auto IdToFrameCallback = [this](const FrameId Id) { - return this->idToFrame(Id); + Frame F = this->idToFrame(Id); + if (!this->KeepSymbolName) + return F; + auto Iter = this->GuidToSymbolName.find(F.Function); + assert(Iter != this->GuidToSymbolName.end()); + F.SymbolName = Iter->getSecond(); + return F; }; + const IndexedMemProfRecord &IndexedRecord = Iter->second; GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)}; Iter++; diff --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test --- a/llvm/test/tools/llvm-profdata/memprof-basic.test +++ b/llvm/test/tools/llvm-profdata/memprof-basic.test @@ -52,6 +52,7 @@ CHECK-NEXT: Callstack: CHECK-NEXT: - CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 21 CHECK-NEXT: Inline: 0 @@ -79,6 +80,7 @@ CHECK-NEXT: Callstack: CHECK-NEXT: - CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 5 CHECK-NEXT: Column: 15 CHECK-NEXT: Inline: 0 diff --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test --- a/llvm/test/tools/llvm-profdata/memprof-inline.test +++ b/llvm/test/tools/llvm-profdata/memprof-inline.test @@ -35,21 +35,6 @@ env MEMPROF_OPTIONS=log_path=stdout ./memprof-inline.exe > inline.memprofraw ``` -From a debug run we collect the name to guid mappings to ensure that the -ordering we observe is as expected. - -``` -build-dbg/bin/llvm-profdata show --memory --debug-only=memprof \ - inline.memprofraw --profiled-binary memprof-inline.exe - -FunctionName: qux GUID: 15505678318020221912 -FunctionName: foo GUID: 6699318081062747564 -FunctionName: bar GUID: 16434608426314478903 -FunctionName: main GUID: 15822663052811949562 - -[..omit output here which is checked below..] -``` - RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/inline.memprofexe | FileCheck %s CHECK: MemprofProfile: @@ -68,21 +53,25 @@ CHECK-NEXT: Callstack: CHECK-NEXT: - CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: SymbolName: qux CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 15 CHECK-NEXT: Inline: 1 CHECK-NEXT: - CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: SymbolName: foo CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 18 CHECK-NEXT: Inline: 0 CHECK-NEXT: - CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: SymbolName: bar CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 19 CHECK-NEXT: Inline: 0 CHECK-NEXT: - CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 3 CHECK-NEXT: Inline: 0 @@ -113,21 +102,25 @@ CHECK-NEXT: Callstack: CHECK-NEXT: - CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: SymbolName: qux CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 15 CHECK-NEXT: Inline: 1 CHECK-NEXT: - CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: SymbolName: foo CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 18 CHECK-NEXT: Inline: 0 CHECK-NEXT: - CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: SymbolName: bar CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 19 CHECK-NEXT: Inline: 0 CHECK-NEXT: - CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 3 CHECK-NEXT: Inline: 0 @@ -155,12 +148,14 @@ CHECK-NEXT: - CHECK-NEXT: - CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: SymbolName: qux CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 15 CHECK-NEXT: Inline: 1 CHECK-NEXT: - CHECK-NEXT: - CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: SymbolName: foo CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 18 CHECK-NEXT: Inline: 0 @@ -170,6 +165,7 @@ CHECK-NEXT: - CHECK-NEXT: - CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 3 CHECK-NEXT: Inline: 0 @@ -179,6 +175,7 @@ CHECK-NEXT: - CHECK-NEXT: - CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: SymbolName: bar CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 19 CHECK-NEXT: Inline: 0 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2532,8 +2532,8 @@ static int showMemProfProfile(const std::string &Filename, const std::string &ProfiledBinary, raw_fd_ostream &OS) { - auto ReaderOr = - llvm::memprof::RawMemProfReader::create(Filename, ProfiledBinary); + auto ReaderOr = llvm::memprof::RawMemProfReader::create( + Filename, ProfiledBinary, /*KeepNames=*/true); if (Error E = ReaderOr.takeError()) // Since the error can be related to the profile or the binary we do not // pass whence. Instead additional context is provided where necessary in diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -104,6 +104,11 @@ *result_listener << "Hash mismatch"; return false; } + if (F.SymbolName.hasValue() && F.SymbolName.getValue() != FunctionName) { + *result_listener << "SymbolName mismatch\nWant: " << FunctionName + << "\nGot: " << F.SymbolName.getValue(); + return false; + } if (F.LineOffset == LineOffset && F.Column == Column && F.IsInlineFrame == Inline) { return true; @@ -154,7 +159,8 @@ auto Seg = makeSegments(); - RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM); + RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM, + /*KeepName=*/true); llvm::DenseMap Records; for (const auto &Pair : Reader) {