diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -196,7 +196,7 @@ }; // The dynamic calling context for the allocation. - std::vector CallStack; + llvm::SmallVector CallStack; // The statistics obtained from the runtime for the allocation. PortableMemInfoBlock Info; diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -32,7 +32,7 @@ // Map from id (recorded from sanitizer stack depot) to virtual addresses for // each program counter address in the callstack. -using CallStackMap = llvm::DenseMap>; +using CallStackMap = llvm::DenseMap>; class RawMemProfReader { public: @@ -75,7 +75,15 @@ llvm::MapVector &Prof, CallStackMap &SM) : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()), - ProfileData(Prof), StackMap(SM) {} + ProfileData(Prof), StackMap(SM) { + // We don't call initialize here since there is no raw profile to read. The + // test should pass in the raw profile as structured data. + + // If there is an error here then the mock symbolizer has not been + // initialized properly. + if (Error E = symbolizeStackFrames()) + report_fatal_error(std::move(E)); + } private: RawMemProfReader(std::unique_ptr DataBuffer, @@ -83,6 +91,7 @@ : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)) {} Error initialize(); Error readRawProfile(); + Error symbolizeStackFrames(); object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); Error fillRecord(const uint64_t Id, const MemInfoBlock &MIB, @@ -102,6 +111,10 @@ llvm::MapVector ProfileData; CallStackMap StackMap; + // Cached symbolization from PC to Frame. + llvm::DenseMap> + SymbolizedFrame; + // Iterator to read from the ProfileData MapVector. llvm::MapVector::iterator Iter = ProfileData.end(); }; diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -132,7 +132,7 @@ const uint64_t StackId = endian::readNext(Ptr); const uint64_t NumPCs = endian::readNext(Ptr); - SmallVector CallStack; + SmallVector CallStack; for (uint64_t J = 0; J < NumPCs; J++) { CallStack.push_back(endian::readNext(Ptr)); } @@ -273,7 +273,46 @@ return report(SOFOr.takeError(), FileName); Symbolizer = std::move(SOFOr.get()); - return readRawProfile(); + if (Error E = readRawProfile()) + return E; + + return symbolizeStackFrames(); +} + +Error RawMemProfReader::symbolizeStackFrames() { + // The specifier to use when symbolization is requested. + const DILineInfoSpecifier Specifier( + DILineInfoSpecifier::FileLineInfoKind::RawValue, + DILineInfoSpecifier::FunctionNameKind::LinkageName); + + for (const auto &Entry : StackMap) { + for (const uint64_t VAddr : Entry.getSecond()) { + // Check if we have already symbolized and cached the result. + if (SymbolizedFrame.count(VAddr) > 0) + continue; + + Expected DIOr = Symbolizer->symbolizeInlinedCode( + getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); + if (!DIOr) + return DIOr.takeError(); + DIInliningInfo DI = DIOr.get(); + + for (size_t I = 0; I < DI.getNumberOfFrames(); I++) { + const auto &Frame = DI.getFrame(I); + SymbolizedFrame[VAddr].emplace_back( + // We use the function guid which we expect to be a uint64_t. At + // this time, it is the lower 64 bits of the md5 of the function + // name. Any suffix with .llvm. is trimmed since these are added by + // thinLTO global promotion. At the time the profile is consumed, + // these suffixes will not be present. + Function::getGUID(trimSuffix(Frame.FunctionName)), + Frame.Line - Frame.StartLine, Frame.Column, + // Only the first entry is not an inlined location. + I != 0); + } + } + } + return Error::success(); } Error RawMemProfReader::readRawProfile() { @@ -347,30 +386,10 @@ Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB, MemProfRecord &Record) { auto &CallStack = StackMap[Id]; - DILineInfoSpecifier Specifier( - DILineInfoSpecifier::FileLineInfoKind::RawValue, - DILineInfoSpecifier::FunctionNameKind::LinkageName); for (const uint64_t Address : CallStack) { - Expected DIOr = Symbolizer->symbolizeInlinedCode( - getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false); - - if (!DIOr) - return DIOr.takeError(); - DIInliningInfo DI = DIOr.get(); - - for (size_t I = 0; I < DI.getNumberOfFrames(); I++) { - const auto &Frame = DI.getFrame(I); - Record.CallStack.emplace_back( - // We use the function guid which we expect to be a uint64_t. At this - // time, it is the lower 64 bits of the md5 of the function name. Any - // suffix with .llvm. is trimmed since these are added by thinLTO - // global promotion. At the time the profile is consumed, these - // suffixes will not be present. - Function::getGUID(trimSuffix(Frame.FunctionName)), - Frame.Line - Frame.StartLine, Frame.Column, - // Only the first entry is not an inlined location. - I != 0); - } + assert(SymbolizedFrame.count(Address) && + "Address not found in symbolized frame cache."); + Record.CallStack.append(SymbolizedFrame[Address]); } Record.Info = PortableMemInfoBlock(MIB); return Error::success(); diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -132,7 +132,7 @@ EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x2000}, specifier(), false)) - .Times(2) + .Times(1) // Only once since we cache the result for future lookups. .WillRepeatedly(Return(makeInliningInfo({ {"foo", 10, 5, 30}, {"bar", 201, 150, 20},