diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -15,6 +15,7 @@ #define LLVM_PROFILEDATA_INSTRPROFWRITER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" @@ -41,7 +42,7 @@ // A map to hold memprof data per function. The lower 64 bits obtained from // the md5 hash of the function name is used to index into the map. - memprof::FunctionMemProfMap MemProfData; + llvm::MapVector MemProfData; // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; @@ -63,7 +64,7 @@ addRecord(std::move(I), 1, Warn); } - void addRecord(const ::llvm::memprof::MemProfRecord &MR, + void addRecord(const Function::GUID Id, const memprof::MemProfRecord &Record, function_ref Warn); /// Merge existing function counts from the given writer. diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -82,9 +82,9 @@ // Print out the contents of the MemInfoBlock in YAML format. void printYAML(raw_ostream &OS) const { - OS << " MemInfoBlock:\n"; + OS << " MemInfoBlock:\n"; #define MIBEntryDef(NameTag, Name, Type) \ - OS << " " << #Name << ": " << Name << "\n"; + OS << " " << #Name << ": " << Name << "\n"; #include "llvm/ProfileData/MIBEntryDef.inc" #undef MIBEntryDef } @@ -133,6 +133,7 @@ #undef MIBEntryDef }; +// Holds the memprof profile information for a function. struct MemProfRecord { // Describes a call frame for a dynamic allocation context. The contents of // the frame are populated by symbolizing the stack depot call frame from the @@ -193,64 +194,123 @@ return sizeof(Frame::Function) + sizeof(Frame::LineOffset) + sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame); } + + // Print the frame information in YAML format. + void printYAML(raw_ostream &OS) const { + OS << " -\n" + << " Function: " << Function << "\n" + << " LineOffset: " << LineOffset << "\n" + << " Column: " << Column << "\n" + << " Inline: " << IsInlineFrame << "\n"; + } + }; + + struct AllocationInfo { + // The dynamic calling context for the allocation. + llvm::SmallVector CallStack; + // The statistics obtained from the runtime for the allocation. + PortableMemInfoBlock Info; + + AllocationInfo() = default; + AllocationInfo(ArrayRef CS, const MemInfoBlock &MB) + : CallStack(CS.begin(), CS.end()), Info(MB) {} + + void printYAML(raw_ostream &OS) const { + OS << " -\n"; + OS << " Callstack:\n"; + // TODO: Print out the frame on one line with to make it easier for deep + // callstacks once we have a test to check valid YAML is generated. + for (const auto &Frame : CallStack) + Frame.printYAML(OS); + Info.printYAML(OS); + } + + size_t serializedSize() const { + return sizeof(uint64_t) + // The number of frames to serialize. + Frame::serializedSize() * + CallStack.size() + // The contents of the frames. + PortableMemInfoBlock::serializedSize(); // The size of the payload. + } }; - // The dynamic calling context for the allocation. - llvm::SmallVector CallStack; - // The statistics obtained from the runtime for the allocation. - PortableMemInfoBlock Info; + llvm::SmallVector AllocSites; + llvm::SmallVector> CallSites; void clear() { - CallStack.clear(); - Info.clear(); + AllocSites.clear(); + CallSites.clear(); + } + + void merge(const MemProfRecord &Other) { + // TODO: Filter out duplicates which may occur if multiple memprof profiles + // are merged together using llvm-profdata. + AllocSites.append(Other.AllocSites); + CallSites.append(Other.CallSites); } size_t serializedSize() const { - return sizeof(uint64_t) + // The number of frames to serialize. - Frame::serializedSize() * - CallStack.size() + // The contents of the frames. - PortableMemInfoBlock::serializedSize(); // The size of the payload. + size_t Result = sizeof(Function::GUID); + for (const AllocationInfo &N : AllocSites) + Result += N.serializedSize(); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + for (const auto &Frames : CallSites) { + // The number of frames to serialize. + Result += sizeof(uint64_t); + for (const Frame &F : Frames) + Result += F.serializedSize(); + } + return Result; } // Prints out the contents of the memprof record in YAML. void print(llvm::raw_ostream &OS) const { - OS << " Callstack:\n"; - // TODO: Print out the frame on one line with to make it easier for deep - // callstacks once we have a test to check valid YAML is generated. - for (const auto &Frame : CallStack) { - OS << " -\n" - << " Function: " << Frame.Function << "\n" - << " LineOffset: " << Frame.LineOffset << "\n" - << " Column: " << Frame.Column << "\n" - << " Inline: " << Frame.IsInlineFrame << "\n"; + if (!AllocSites.empty()) { + OS << " AllocSites:\n"; + for (const AllocationInfo &N : AllocSites) + N.printYAML(OS); } - Info.printYAML(OS); + if (!CallSites.empty()) { + OS << " CallSites:\n"; + for (const auto &Frames : CallSites) { + for (const auto &F : Frames) { + OS << " -\n"; + F.printYAML(OS); + } + } + } } bool operator==(const MemProfRecord &Other) const { - if (Other.Info != Info) + if (Other.AllocSites.size() != AllocSites.size()) return false; - if (Other.CallStack.size() != CallStack.size()) - return false; + for (size_t I = 0; I < AllocSites.size(); I++) { + if (Other.AllocSites[I].Info != AllocSites[I].Info) + return false; - for (size_t I = 0; I < Other.CallStack.size(); I++) { - if (Other.CallStack[I] != CallStack[I]) + if (Other.AllocSites[I].CallStack.size() != + AllocSites[I].CallStack.size()) return false; + + for (size_t J = 0; J < Other.AllocSites[I].CallStack.size(); J++) { + if (Other.AllocSites[I].CallStack[J] != AllocSites[I].CallStack[J]) + return false; + } } return true; } -}; -// Serializes the memprof records in \p Records to the ostream \p OS based on -// the schema provided in \p Schema. -void serializeRecords(const ArrayRef Records, - const MemProfSchema &Schema, raw_ostream &OS); + // Serializes the memprof records in \p Records to the ostream \p OS based on + // the schema provided in \p Schema. + void serialize(const MemProfSchema &Schema, raw_ostream &OS); -// Deserializes memprof records from the Buffer -SmallVector deserializeRecords(const MemProfSchema &Schema, - const unsigned char *Buffer); + // Deserializes memprof records from the Buffer. + static MemProfRecord deserialize(const MemProfSchema &Schema, + const unsigned char *Buffer); +}; // Reads a memprof schema from a buffer. All entries in the buffer are // interpreted as uint64_t. The first entry in the buffer denotes the number of @@ -259,14 +319,11 @@ // byte past the schema contents. Expected readMemProfSchema(const unsigned char *&Buffer); -using FunctionMemProfMap = - DenseMap>; - /// Trait for lookups into the on-disk hash table for memprof format in the /// indexed profile. class MemProfRecordLookupTrait { public: - using data_type = ArrayRef; + using data_type = const MemProfRecord &; using internal_key_type = uint64_t; using external_key_type = uint64_t; using hash_value_type = uint64_t; @@ -297,15 +354,15 @@ data_type ReadData(uint64_t K, const unsigned char *D, offset_type /*Unused*/) { - Records = deserializeRecords(Schema, D); - return Records; + Record = MemProfRecord::deserialize(Schema, D); + return Record; } private: // Holds the memprof schema used to deserialize records. MemProfSchema Schema; // Holds the records from one function deserialized from the indexed format. - llvm::SmallVector Records; + MemProfRecord Record; }; class MemProfRecordWriterTrait { @@ -313,8 +370,8 @@ using key_type = uint64_t; using key_type_ref = uint64_t; - using data_type = ArrayRef; - using data_type_ref = ArrayRef; + using data_type = MemProfRecord; + using data_type_ref = MemProfRecord &; using hash_value_type = uint64_t; using offset_type = uint64_t; @@ -333,17 +390,9 @@ using namespace support; endian::Writer LE(Out, little); - offset_type N = sizeof(K); LE.write(N); - - offset_type M = 0; - - M += sizeof(uint64_t); - for (const auto &Record : V) { - M += Record.serializedSize(); - } - + offset_type M = V.serializedSize(); LE.write(M); return std::make_pair(N, M); } @@ -357,7 +406,7 @@ void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, offset_type /*Unused*/) { assert(Schema != nullptr && "MemProf schema is not initialized!"); - serializeRecords(V, *Schema, Out); + V.serialize(*Schema, Out); } }; diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -14,6 +14,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" @@ -57,15 +58,16 @@ static Expected> create(const Twine &Path, const StringRef ProfiledBinary); - Error readNextRecord(MemProfRecord &Record); - - using Iterator = InstrProfIterator; + using GuidMemProfRecordPair = std::pair; + using Iterator = InstrProfIterator; Iterator end() { return Iterator(); } Iterator begin() { - Iter = ProfileData.begin(); + Iter = FunctionProfileData.begin(); return Iterator(this); } + Error readNextRecord(GuidMemProfRecordPair &GuidRecord); + // The RawMemProfReader only holds memory profile information. InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } @@ -75,7 +77,7 @@ llvm::MapVector &Prof, CallStackMap &SM) : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()), - ProfileData(Prof), StackMap(SM) { + CallstackProfileData(Prof), StackMap(SM) { // We don't call initialize here since there is no raw profile to read. The // test should pass in the raw profile as structured data. @@ -83,6 +85,8 @@ // initialized properly. if (Error E = symbolizeAndFilterStackFrames()) report_fatal_error(std::move(E)); + if (Error E = mapRawProfileToRecords()) + report_fatal_error(std::move(E)); } private: @@ -96,10 +100,12 @@ // symbolize or those that belong to the runtime. For profile entries where // the entire callstack is pruned, we drop the entry from the profile. Error symbolizeAndFilterStackFrames(); + // Construct memprof records for each function that and store it in the + // `FunctionProfileData` map. A function may have allocation profile data or + // callsite data or both. + Error mapRawProfileToRecords(); object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); - Error fillRecord(const uint64_t Id, const MemInfoBlock &MIB, - MemProfRecord &Record); // Prints aggregate counts for each raw profile parsed from the DataBuffer in // YAML format. void printSummaries(raw_ostream &OS) const; @@ -112,15 +118,15 @@ llvm::SmallVector SegmentInfo; // A map from callstack id (same as key in CallStackMap below) to the heap // information recorded for that allocation context. - llvm::MapVector ProfileData; + llvm::MapVector CallstackProfileData; CallStackMap StackMap; // Cached symbolization from PC to Frame. llvm::DenseMap> SymbolizedFrame; - // Iterator to read from the ProfileData MapVector. - llvm::MapVector::iterator Iter = ProfileData.end(); + llvm::MapVector FunctionProfileData; + llvm::MapVector::iterator Iter; }; } // namespace memprof diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -253,28 +253,14 @@ Dest.sortValueData(); } -void InstrProfWriter::addRecord(const memprof::MemProfRecord &MR, +void InstrProfWriter::addRecord(const Function::GUID Id, + const memprof::MemProfRecord &Record, function_ref Warn) { - // Use 0 as a sentinel value since its highly unlikely that the lower 64-bits - // of a 128 bit md5 hash will be all zeros. - // TODO: Move this Key frame detection to the contructor to avoid having to - // scan all the callstacks again when adding a new record. - uint64_t Key = 0; - for (auto Iter = MR.CallStack.rbegin(), End = MR.CallStack.rend(); - Iter != End; Iter++) { - if (!Iter->IsInlineFrame) { - Key = Iter->Function; - break; - } - } - - if (Key == 0) { - Warn(make_error( - instrprof_error::invalid_prof, - "could not determine leaf function for memprof record.")); + auto Result = MemProfData.insert({Id, Record}); + if (!Result.second) { + memprof::MemProfRecord &Existing = Result.first->second; + Existing.merge(Record); } - - MemProfData[Key].push_back(MR); } void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, @@ -283,9 +269,9 @@ for (auto &Func : I.getValue()) addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); - for (auto &I : IPW.MemProfData) - for (const auto &MR : I.second) - addRecord(MR, Warn); + for (auto &I : IPW.MemProfData) { + addRecord(I.first, I.second, Warn); + } } bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { @@ -415,8 +401,8 @@ MemProfWriter->Schema = &Schema; OnDiskChainedHashTableGenerator MemProfGenerator; - for (const auto &I : MemProfData) { - // Insert the key (func hash) and value (vector of memprof records). + for (auto &I : MemProfData) { + // Insert the key (func hash) and value (memprof record). MemProfGenerator.insert(I.first, I.second); } diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -1,4 +1,5 @@ #include "llvm/ProfileData/MemProf.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -6,43 +7,65 @@ namespace llvm { namespace memprof { -void serializeRecords(const ArrayRef Records, - const MemProfSchema &Schema, raw_ostream &OS) { +void MemProfRecord::serialize(const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; endian::Writer LE(OS, little); - LE.write(Records.size()); - for (const MemProfRecord &MR : Records) { - LE.write(MR.CallStack.size()); - for (const MemProfRecord::Frame &F : MR.CallStack) { + LE.write(AllocSites.size()); + for (const AllocationInfo &N : AllocSites) { + LE.write(N.CallStack.size()); + for (const Frame &F : N.CallStack) + F.serialize(OS); + N.Info.serialize(Schema, OS); + } + + // Related contexts. + LE.write(CallSites.size()); + for (const auto &Frames : CallSites) { + LE.write(Frames.size()); + for (const Frame &F : Frames) F.serialize(OS); - } - MR.Info.serialize(Schema, OS); } } -SmallVector deserializeRecords(const MemProfSchema &Schema, - const unsigned char *Ptr) { +MemProfRecord MemProfRecord::deserialize(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; - SmallVector Records; - const uint64_t NumRecords = - endian::readNext(Ptr); - for (uint64_t I = 0; I < NumRecords; I++) { - MemProfRecord MR; + MemProfRecord Record; + + // Read the meminfo nodes. + const uint64_t NumNodes = endian::readNext(Ptr); + for (uint64_t I = 0; I < NumNodes; I++) { + MemProfRecord::AllocationInfo Node; const uint64_t NumFrames = endian::readNext(Ptr); for (uint64_t J = 0; J < NumFrames; J++) { const auto F = MemProfRecord::Frame::deserialize(Ptr); Ptr += MemProfRecord::Frame::serializedSize(); - MR.CallStack.push_back(F); + Node.CallStack.push_back(F); } - MR.Info.deserialize(Schema, Ptr); + Node.Info.deserialize(Schema, Ptr); Ptr += PortableMemInfoBlock::serializedSize(); - Records.push_back(MR); + Record.AllocSites.push_back(Node); + } + + // Read the callsite information. + const uint64_t NumCtxs = endian::readNext(Ptr); + for (uint64_t J = 0; J < NumCtxs; J++) { + const uint64_t NumFrames = + endian::readNext(Ptr); + llvm::SmallVector Frames; + for (uint64_t K = 0; K < NumFrames; K++) { + const auto F = MemProfRecord::Frame::deserialize(Ptr); + Ptr += MemProfRecord::Frame::serializedSize(); + Frames.push_back(F); + } + Record.CallSites.push_back(Frames); } - return Records; + + return Record; } Expected readMemProfSchema(const unsigned char *&Buffer) { diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -233,9 +233,10 @@ printSummaries(OS); // Print out the merged contents of the profiles. OS << " Records:\n"; - for (const auto &Record : *this) { + for (const auto &Entry : *this) { OS << " -\n"; - Record.print(OS); + OS << " FunctionGUID: " << Entry.first << "\n"; + Entry.second.print(OS); } } @@ -288,7 +289,73 @@ if (Error E = readRawProfile()) return E; - return symbolizeAndFilterStackFrames(); + if (Error E = symbolizeAndFilterStackFrames()) + return E; + + return mapRawProfileToRecords(); +} + +Error RawMemProfReader::mapRawProfileToRecords() { + // Convert the raw profile callstack data into memprof records. While doing so + // keep track of related contexts so that we can fill these in later. + llvm::DenseMap> + PerFunctionCallSites; + + for (const auto &Entry : CallstackProfileData) { + const uint64_t StackId = Entry.first; + + auto It = StackMap.find(StackId); + if (It == StackMap.end()) + return make_error( + instrprof_error::malformed, + "memprof callstack record does not contain id: " + Twine(StackId)); + + // Construct the symbolized callstack. + llvm::SmallVector Callstack; + Callstack.reserve(It->getSecond().size()); + + Function::GUID LeafFunction = 0; + for (const uint64_t Address : It->getSecond()) { + assert(SymbolizedFrame.count(Address) > 0 && + "Address not found in SymbolizedFrame map"); + const SmallVector &Frames = + SymbolizedFrame[Address]; + + // The leaf function is the first PC in the callstack so check if unset. + if (LeafFunction == 0) { + LeafFunction = Frames.front().Function; + assert(!Frames.front().IsInlineFrame && + "The first frame should not be inlined"); + } else { + // Record the function this address belongs to. We skip call sites in + // the leaf since that call is to the allocator itself. + PerFunctionCallSites[Frames.front().Function].insert(Address); + } + // Add all the frames to the current allocation callstack. + Callstack.append(Frames.begin(), Frames.end()); + } + + auto Result = FunctionProfileData.insert({LeafFunction, MemProfRecord()}); + MemProfRecord &Record = Result.first->second; + Record.AllocSites.emplace_back(Callstack, Entry.second); + } + + // Fill in the related contexts per function profile. + for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end(); + I != E; I++) { + const Function::GUID Id = I->first; + // Some functions may have only callsite data and no allocation data. Here + // we insert a new entry for callsite data if we need to. + auto Result = FunctionProfileData.insert({Id, MemProfRecord()}); + MemProfRecord &Record = Result.first->second; + for (const uint64_t Address : I->getSecond()) { + const SmallVector &Frames = + SymbolizedFrame[Address]; + Record.CallSites.push_back(Frames); + } + } + + return Error::success(); } Error RawMemProfReader::symbolizeAndFilterStackFrames() { @@ -353,7 +420,7 @@ // Drop the entries where the callstack is empty. for (const uint64_t Id : EntriesToErase) { StackMap.erase(Id); - ProfileData.erase(Id); + CallstackProfileData.erase(Id); } if (StackMap.empty()) @@ -388,10 +455,10 @@ // raw profiles in the same binary file are from the same process so the // stackdepot ids are the same. for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { - if (ProfileData.count(Value.first)) { - ProfileData[Value.first].Merge(Value.second); + if (CallstackProfileData.count(Value.first)) { + CallstackProfileData[Value.first].Merge(Value.second); } else { - ProfileData[Value.first] = Value.second; + CallstackProfileData[Value.first] = Value.second; } } @@ -432,29 +499,14 @@ return object::SectionedAddress{VirtualAddress}; } -Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB, - MemProfRecord &Record) { - auto &CallStack = StackMap[Id]; - for (const uint64_t Address : CallStack) { - assert(SymbolizedFrame.count(Address) && - "Address not found in symbolized frame cache."); - Record.CallStack.append(SymbolizedFrame[Address]); - } - Record.Info = PortableMemInfoBlock(MIB); - return Error::success(); -} - -Error RawMemProfReader::readNextRecord(MemProfRecord &Record) { - if (ProfileData.empty()) +Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { + if (FunctionProfileData.empty()) return make_error(instrprof_error::empty_raw_profile); - if (Iter == ProfileData.end()) + if (Iter == FunctionProfileData.end()) return make_error(instrprof_error::eof); - Record.clear(); - if (Error E = fillRecord(Iter->first, Iter->second, Record)) { - return E; - } + GuidRecord = {Iter->first, Iter->second}; Iter++; return Error::success(); } diff --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test --- a/llvm/test/tools/llvm-profdata/memprof-basic.test +++ b/llvm/test/tools/llvm-profdata/memprof-basic.test @@ -46,56 +46,59 @@ CHECK-NEXT: NumStackOffsets: 3 CHECK-NEXT: Records: CHECK-NEXT: - -CHECK-NEXT: Callstack: +CHECK-NEXT: FunctionGUID: {{[0-9]+}} +CHECK-NEXT: AllocSites: CHECK-NEXT: - -CHECK-NEXT: Function: {{[0-9]+}} -CHECK-NEXT: LineOffset: 1 -CHECK-NEXT: Column: 21 -CHECK-NEXT: Inline: 0 -CHECK-NEXT: MemInfoBlock: -CHECK-NEXT: AllocCount: 1 -CHECK-NEXT: TotalAccessCount: 2 -CHECK-NEXT: MinAccessCount: 2 -CHECK-NEXT: MaxAccessCount: 2 -CHECK-NEXT: TotalSize: 10 -CHECK-NEXT: MinSize: 10 -CHECK-NEXT: MaxSize: 10 -CHECK-NEXT: AllocTimestamp: 986 -CHECK-NEXT: DeallocTimestamp: 986 -CHECK-NEXT: TotalLifetime: 0 -CHECK-NEXT: MinLifetime: 0 -CHECK-NEXT: MaxLifetime: 0 -CHECK-NEXT: AllocCpuId: 56 -CHECK-NEXT: DeallocCpuId: 56 -CHECK-NEXT: NumMigratedCpu: 0 -CHECK-NEXT: NumLifetimeOverlaps: 0 -CHECK-NEXT: NumSameAllocCpu: 0 -CHECK-NEXT: NumSameDeallocCpu: 0 -CHECK-NEXT: DataTypeId: {{[0-9]+}} -CHECK-NEXT: - -CHECK-NEXT: Callstack: +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 21 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 2 +CHECK-NEXT: MinAccessCount: 2 +CHECK-NEXT: MaxAccessCount: 2 +CHECK-NEXT: TotalSize: 10 +CHECK-NEXT: MinSize: 10 +CHECK-NEXT: MaxSize: 10 +CHECK-NEXT: AllocTimestamp: 986 +CHECK-NEXT: DeallocTimestamp: 986 +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: 56 +CHECK-NEXT: DeallocCpuId: 56 +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} CHECK-NEXT: - -CHECK-NEXT: Function: {{[0-9]+}} -CHECK-NEXT: LineOffset: 5 -CHECK-NEXT: Column: 15 -CHECK-NEXT: Inline: 0 -CHECK-NEXT: MemInfoBlock: -CHECK-NEXT: AllocCount: 1 -CHECK-NEXT: TotalAccessCount: 2 -CHECK-NEXT: MinAccessCount: 2 -CHECK-NEXT: MaxAccessCount: 2 -CHECK-NEXT: TotalSize: 10 -CHECK-NEXT: MinSize: 10 -CHECK-NEXT: MaxSize: 10 -CHECK-NEXT: AllocTimestamp: 987 -CHECK-NEXT: DeallocTimestamp: 987 -CHECK-NEXT: TotalLifetime: 0 -CHECK-NEXT: MinLifetime: 0 -CHECK-NEXT: MaxLifetime: 0 -CHECK-NEXT: AllocCpuId: 56 -CHECK-NEXT: DeallocCpuId: 56 -CHECK-NEXT: NumMigratedCpu: 0 -CHECK-NEXT: NumLifetimeOverlaps: 0 -CHECK-NEXT: NumSameAllocCpu: 0 -CHECK-NEXT: NumSameDeallocCpu: 0 -CHECK-NEXT: DataTypeId: {{[0-9]+}} +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: LineOffset: 5 +CHECK-NEXT: Column: 15 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 2 +CHECK-NEXT: MinAccessCount: 2 +CHECK-NEXT: MaxAccessCount: 2 +CHECK-NEXT: TotalSize: 10 +CHECK-NEXT: MinSize: 10 +CHECK-NEXT: MaxSize: 10 +CHECK-NEXT: AllocTimestamp: 987 +CHECK-NEXT: DeallocTimestamp: 987 +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: 56 +CHECK-NEXT: DeallocCpuId: 56 +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -267,8 +267,8 @@ } // Add the records into the writer context. - for (const memprof::MemProfRecord &MR : *Reader) { - WC->Writer.addRecord(MR, [&](Error E) { + for (auto I = Reader->begin(), E = Reader->end(); I != E; ++I) { + WC->Writer.addRecord(/*Id=*/I->first, /*Record=*/I->second, [&](Error E) { instrprof_error IPE = InstrProfError::take(std::move(E)); WC->Errors.emplace_back(make_error(IPE), Filename); }); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -13,6 +13,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/MemProfData.inc" #include "llvm/Support/Compression.h" #include "llvm/Testing/Support/Error.h" #include "llvm/Testing/Support/SupportHelpers.h" @@ -222,13 +223,22 @@ ASSERT_EQ(0U, R->Counts[1]); } +using ::llvm::memprof::MemInfoBlock; +using ::llvm::memprof::MemProfRecord; +MemProfRecord makeRecord(std::initializer_list Frames, + const MemInfoBlock &Block = MemInfoBlock()) { + llvm::memprof::MemProfRecord MR; + MR.AllocSites.emplace_back(Frames, Block); + return MR; +} + TEST_F(InstrProfTest, test_memprof) { ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), Succeeded()); - llvm::memprof::MemProfRecord MR; - MR.CallStack.push_back({0x123, 1, 2, false}); - MR.CallStack.push_back({0x345, 3, 4, true}); - Writer.addRecord(MR, Err); + + const MemProfRecord MR = + makeRecord({{0x123, 1, 2, false}, {0x345, 3, 4, true}}); + Writer.addRecord(0x123, MR, Err); auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); @@ -247,10 +257,9 @@ ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::MemProf), Succeeded()); - llvm::memprof::MemProfRecord MR; - MR.CallStack.push_back({0x123, 1, 2, false}); - MR.CallStack.push_back({0x345, 3, 4, true}); - Writer2.addRecord(MR, Err); + const MemProfRecord MR = + makeRecord({{0x123, 1, 2, false}, {0x345, 3, 4, true}}); + Writer2.addRecord(0x123, MR, Err); ASSERT_THAT_ERROR(Writer.mergeProfileKind(Writer2.getProfileKind()), Succeeded()); @@ -271,17 +280,17 @@ EXPECT_EQ(Records[0], MR); } -TEST_F(InstrProfTest, test_memprof_invalid_add_record) { - llvm::memprof::MemProfRecord MR; - // At least one of the frames should be a non-inline frame. - MR.CallStack.push_back({0x123, 1, 2, true}); - MR.CallStack.push_back({0x345, 3, 4, true}); - - auto CheckErr = [](Error &&E) { - EXPECT_TRUE(ErrorEquals(instrprof_error::invalid_prof, std::move(E))); - }; - Writer.addRecord(MR, CheckErr); -} +// TEST_F(InstrProfTest, test_memprof_invalid_add_record) { +// llvm::memprof::MemProfRecord MR; +// // At least one of the frames should be a non-inline frame. +// MR.CallStack.push_back({0x123, 1, 2, true}); +// MR.CallStack.push_back({0x345, 3, 4, true}); +// +// auto CheckErr = [](Error &&E) { +// EXPECT_TRUE(ErrorEquals(instrprof_error::invalid_prof, std::move(E))); +// }; +// Writer.addRecord(MR, CheckErr); +// } static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -140,6 +140,14 @@ {"bar", 201, 150, 20}, }))); + EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x4000}, + specifier(), false)) + .Times(1) + .WillRepeatedly(Return(makeInliningInfo({ + {"xyz", 10, 5, 30}, + {"abc", 10, 5, 30}, + }))); + EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x6000}, specifier(), false)) .Times(1) @@ -149,7 +157,7 @@ }))); CallStackMap CSM; - CSM[0x1] = {0x2000}; + CSM[0x1] = {0x2000, 0x4000}; CSM[0x2] = {0x6000, 0x2000}; llvm::MapVector Prof; @@ -160,21 +168,42 @@ RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM); - std::vector Records; - for (const MemProfRecord &R : Reader) { - Records.push_back(R); + llvm::SmallVector Records; + for (const auto &KeyRecordPair : Reader) { + Records.push_back(KeyRecordPair.second); } - EXPECT_EQ(Records.size(), 2U); - - EXPECT_EQ(Records[0].Info.getAllocCount(), 1U); - EXPECT_EQ(Records[1].Info.getAllocCount(), 2U); - EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, false)); - EXPECT_THAT(Records[0].CallStack[1], FrameContains("bar", 51U, 20U, true)); - - EXPECT_THAT(Records[1].CallStack[0], FrameContains("baz", 5U, 30U, false)); - EXPECT_THAT(Records[1].CallStack[1], FrameContains("qux", 5U, 10U, true)); - EXPECT_THAT(Records[1].CallStack[2], FrameContains("foo", 5U, 30U, false)); - EXPECT_THAT(Records[1].CallStack[3], FrameContains("bar", 51U, 20U, true)); + // The PCs in each callstack have the leaf function at index zero. Since + // memprof records are grouped by leaf function, here we expect 2 records for + // functions with allocation contexts. 1 additional record with only callsite + // data is expected for the first profile entry. + ASSERT_EQ(Records.size(), 3U); + ASSERT_EQ(Records[0].AllocSites.size(), 1U); + ASSERT_EQ(Records[1].AllocSites.size(), 1U); + + EXPECT_EQ(Records[0].AllocSites[0].Info.getAllocCount(), 1U); + EXPECT_EQ(Records[1].AllocSites[0].Info.getAllocCount(), 2U); + + EXPECT_THAT(Records[0].AllocSites[0].CallStack[0], + FrameContains("foo", 5U, 30U, false)); + EXPECT_THAT(Records[0].AllocSites[0].CallStack[1], + FrameContains("bar", 51U, 20U, true)); + + EXPECT_THAT(Records[1].AllocSites[0].CallStack[0], + FrameContains("baz", 5U, 30U, false)); + EXPECT_THAT(Records[1].AllocSites[0].CallStack[1], + FrameContains("qux", 5U, 10U, true)); + EXPECT_THAT(Records[1].AllocSites[0].CallStack[2], + FrameContains("foo", 5U, 30U, false)); + EXPECT_THAT(Records[1].AllocSites[0].CallStack[3], + FrameContains("bar", 51U, 20U, true)); + + // Check the callsite frames. + EXPECT_TRUE(Records[2].AllocSites.empty()); + ASSERT_EQ(Records[2].CallSites.size(), 1U); + // We expect one actual frame and one inline frame. + ASSERT_EQ(Records[2].CallSites[0].size(), 2U); + EXPECT_THAT(Records[2].CallSites[0][0], FrameContains("xyz", 5U, 30U, false)); + EXPECT_THAT(Records[2].CallSites[0][1], FrameContains("abc", 5U, 30U, true)); } TEST(MemProf, PortableWrapper) { @@ -205,36 +234,33 @@ TEST(MemProf, RecordSerializationRoundTrip) { const MemProfSchema Schema = getFullSchema(); - llvm::SmallVector Records; - MemProfRecord MR; - MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, /*dealloc_cpu=*/4); - MR.Info = PortableMemInfoBlock(Info); - MR.CallStack.push_back({0x123, 1, 2, false}); - MR.CallStack.push_back({0x345, 3, 4, false}); - Records.push_back(MR); + llvm::SmallVector> AllocCallStacks = { + {{0x123, 1, 2, false}, {0x345, 3, 4, false}}, + {{0x123, 1, 2, false}, {0x567, 5, 6, false}}}; - MR.clear(); - MR.Info = PortableMemInfoBlock(Info); - MR.CallStack.push_back({0x567, 5, 6, false}); - MR.CallStack.push_back({0x789, 7, 8, false}); - Records.push_back(MR); + llvm::SmallVector> CallSites = { + {{0x333, 1, 2, false}, {0x777, 3, 4, true}}}; + + MemProfRecord Record; + for (const auto &ACS : AllocCallStacks) { + // Use the same info block for both allocation sites. + Record.AllocSites.emplace_back(ACS, Info); + } + Record.CallSites.assign(CallSites); std::string Buffer; llvm::raw_string_ostream OS(Buffer); - serializeRecords(Records, Schema, OS); + Record.serialize(Schema, OS); OS.flush(); - const llvm::SmallVector GotRecords = deserializeRecords( + const MemProfRecord GotRecord = MemProfRecord::deserialize( Schema, reinterpret_cast(Buffer.data())); - ASSERT_TRUE(!GotRecords.empty()); - EXPECT_EQ(GotRecords.size(), Records.size()); - EXPECT_THAT(GotRecords[0], EqualsRecord(Records[0])); - EXPECT_THAT(GotRecords[1], EqualsRecord(Records[1])); + EXPECT_THAT(GotRecord, EqualsRecord(Record)); } TEST(MemProf, SymbolizationFilter) { @@ -282,12 +308,15 @@ RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM); - std::vector Records; - for (const MemProfRecord &R : Reader) { - Records.push_back(R); + llvm::SmallVector Records; + for (const auto &KeyRecordPair : Reader) { + Records.push_back(KeyRecordPair.second); } + ASSERT_EQ(Records.size(), 1U); - ASSERT_EQ(Records[0].CallStack.size(), 1U); - EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, false)); + ASSERT_EQ(Records[0].AllocSites.size(), 1U); + ASSERT_EQ(Records[0].AllocSites[0].CallStack.size(), 1U); + EXPECT_THAT(Records[0].AllocSites[0].CallStack[0], + FrameContains("foo", 5U, 30U, false)); } } // namespace