diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -15,7 +15,9 @@ #define LLVM_PROFILEDATA_INSTRPROFWRITER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" @@ -41,7 +43,7 @@ // A map to hold memprof data per function. The lower 64 bits obtained from // the md5 hash of the function name is used to index into the map. - memprof::FunctionMemProfMap MemProfData; + llvm::MapVector MemProfData; // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; @@ -63,7 +65,8 @@ addRecord(std::move(I), 1, Warn); } - void addRecord(const ::llvm::memprof::MemProfRecord &MR, + void addRecord(const GlobalValue::GUID Id, + const memprof::MemProfRecord &Record, function_ref Warn); /// Merge existing function counts from the given writer. diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -82,9 +82,9 @@ // Print out the contents of the MemInfoBlock in YAML format. void printYAML(raw_ostream &OS) const { - OS << " MemInfoBlock:\n"; + OS << " MemInfoBlock:\n"; #define MIBEntryDef(NameTag, Name, Type) \ - OS << " " << #Name << ": " << Name << "\n"; + OS << " " << #Name << ": " << Name << "\n"; #include "llvm/ProfileData/MIBEntryDef.inc" #undef MIBEntryDef } @@ -133,6 +133,7 @@ #undef MIBEntryDef }; +// Holds the memprof profile information for a function. struct MemProfRecord { // Describes a call frame for a dynamic allocation context. The contents of // the frame are populated by symbolizing the stack depot call frame from the @@ -193,64 +194,152 @@ return sizeof(Frame::Function) + sizeof(Frame::LineOffset) + sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame); } + + // Print the frame information in YAML format. + void printYAML(raw_ostream &OS) const { + OS << " -\n" + << " Function: " << Function << "\n" + << " LineOffset: " << LineOffset << "\n" + << " Column: " << Column << "\n" + << " Inline: " << IsInlineFrame << "\n"; + } }; - // The dynamic calling context for the allocation. - llvm::SmallVector CallStack; - // The statistics obtained from the runtime for the allocation. - PortableMemInfoBlock Info; + struct AllocationInfo { + // The dynamic calling context for the allocation. + llvm::SmallVector CallStack; + // The statistics obtained from the runtime for the allocation. + PortableMemInfoBlock Info; + + AllocationInfo() = default; + AllocationInfo(ArrayRef CS, const MemInfoBlock &MB) + : CallStack(CS.begin(), CS.end()), Info(MB) {} + + void printYAML(raw_ostream &OS) const { + OS << " -\n"; + OS << " Callstack:\n"; + // TODO: Print out the frame on one line with to make it easier for deep + // callstacks once we have a test to check valid YAML is generated. + for (const auto &Frame : CallStack) + Frame.printYAML(OS); + Info.printYAML(OS); + } + + size_t serializedSize() const { + return sizeof(uint64_t) + // The number of frames to serialize. + Frame::serializedSize() * + CallStack.size() + // The contents of the frames. + PortableMemInfoBlock::serializedSize(); // The size of the payload. + } + + bool operator==(const AllocationInfo &Other) const { + if (Other.Info != Info) + return false; + + if (Other.CallStack.size() != CallStack.size()) + return false; + + for (size_t J = 0; J < Other.CallStack.size(); J++) { + if (Other.CallStack[J] != CallStack[J]) + return false; + } + return true; + } + + bool operator!=(const AllocationInfo &Other) const { + return !operator==(Other); + } + }; + + // Memory allocation sites in this function for which we have memory profiling + // data. + llvm::SmallVector AllocSites; + // Holds call sites in this function which are part of some memory allocation + // context. We store this as a list of locations, each with its list of + // inline locations in bottom-up order i.e. from leaf to root. The inline + // location list may include additional entries, users should pick the last + // entry in the list with the same function GUID. + llvm::SmallVector> CallSites; void clear() { - CallStack.clear(); - Info.clear(); + AllocSites.clear(); + CallSites.clear(); + } + + void merge(const MemProfRecord &Other) { + // TODO: Filter out duplicates which may occur if multiple memprof profiles + // are merged together using llvm-profdata. + AllocSites.append(Other.AllocSites); + CallSites.append(Other.CallSites); } size_t serializedSize() const { - return sizeof(uint64_t) + // The number of frames to serialize. - Frame::serializedSize() * - CallStack.size() + // The contents of the frames. - PortableMemInfoBlock::serializedSize(); // The size of the payload. + size_t Result = sizeof(GlobalValue::GUID); + for (const AllocationInfo &N : AllocSites) + Result += N.serializedSize(); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + for (const auto &Frames : CallSites) { + // The number of frames to serialize. + Result += sizeof(uint64_t); + for (const Frame &F : Frames) + Result += F.serializedSize(); + } + return Result; } // Prints out the contents of the memprof record in YAML. void print(llvm::raw_ostream &OS) const { - OS << " Callstack:\n"; - // TODO: Print out the frame on one line with to make it easier for deep - // callstacks once we have a test to check valid YAML is generated. - for (const auto &Frame : CallStack) { - OS << " -\n" - << " Function: " << Frame.Function << "\n" - << " LineOffset: " << Frame.LineOffset << "\n" - << " Column: " << Frame.Column << "\n" - << " Inline: " << Frame.IsInlineFrame << "\n"; + if (!AllocSites.empty()) { + OS << " AllocSites:\n"; + for (const AllocationInfo &N : AllocSites) + N.printYAML(OS); } - Info.printYAML(OS); + if (!CallSites.empty()) { + OS << " CallSites:\n"; + for (const auto &Frames : CallSites) { + for (const auto &F : Frames) { + OS << " -\n"; + F.printYAML(OS); + } + } + } } bool operator==(const MemProfRecord &Other) const { - if (Other.Info != Info) + if (Other.AllocSites.size() != AllocSites.size()) return false; - if (Other.CallStack.size() != CallStack.size()) + if (Other.CallSites.size() != CallSites.size()) return false; - for (size_t I = 0; I < Other.CallStack.size(); I++) { - if (Other.CallStack[I] != CallStack[I]) + for (size_t I = 0; I < AllocSites.size(); I++) { + if (AllocSites[I] != Other.AllocSites[I]) + return false; + } + + for (size_t I = 0; I < CallSites.size(); I++) { + if (CallSites[I] != Other.CallSites[I]) return false; } return true; } -}; -// Serializes the memprof records in \p Records to the ostream \p OS based on -// the schema provided in \p Schema. -void serializeRecords(const ArrayRef Records, - const MemProfSchema &Schema, raw_ostream &OS); + // Serializes the memprof records in \p Records to the ostream \p OS based on + // the schema provided in \p Schema. + void serialize(const MemProfSchema &Schema, raw_ostream &OS); -// Deserializes memprof records from the Buffer -SmallVector deserializeRecords(const MemProfSchema &Schema, - const unsigned char *Buffer); + // Deserializes memprof records from the Buffer. + static MemProfRecord deserialize(const MemProfSchema &Schema, + const unsigned char *Buffer); + + // Returns the GUID for the function name after canonicalization. For memprof, + // we remove any .llvm suffix added by LTO. MemProfRecords are mapped to + // functions using this GUID. + static GlobalValue::GUID getGUID(const StringRef FunctionName); +}; // Reads a memprof schema from a buffer. All entries in the buffer are // interpreted as uint64_t. The first entry in the buffer denotes the number of @@ -259,14 +348,11 @@ // byte past the schema contents. Expected readMemProfSchema(const unsigned char *&Buffer); -using FunctionMemProfMap = - DenseMap>; - /// Trait for lookups into the on-disk hash table for memprof format in the /// indexed profile. class MemProfRecordLookupTrait { public: - using data_type = ArrayRef; + using data_type = const MemProfRecord &; using internal_key_type = uint64_t; using external_key_type = uint64_t; using hash_value_type = uint64_t; @@ -297,15 +383,15 @@ data_type ReadData(uint64_t K, const unsigned char *D, offset_type /*Unused*/) { - Records = deserializeRecords(Schema, D); - return Records; + Record = MemProfRecord::deserialize(Schema, D); + return Record; } private: // Holds the memprof schema used to deserialize records. MemProfSchema Schema; // Holds the records from one function deserialized from the indexed format. - llvm::SmallVector Records; + MemProfRecord Record; }; class MemProfRecordWriterTrait { @@ -313,8 +399,8 @@ using key_type = uint64_t; using key_type_ref = uint64_t; - using data_type = ArrayRef; - using data_type_ref = ArrayRef; + using data_type = MemProfRecord; + using data_type_ref = MemProfRecord &; using hash_value_type = uint64_t; using offset_type = uint64_t; @@ -333,17 +419,9 @@ using namespace support; endian::Writer LE(Out, little); - offset_type N = sizeof(K); LE.write(N); - - offset_type M = 0; - - M += sizeof(uint64_t); - for (const auto &Record : V) { - M += Record.serializedSize(); - } - + offset_type M = V.serializedSize(); LE.write(M); return std::make_pair(N, M); } @@ -357,7 +435,7 @@ void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, offset_type /*Unused*/) { assert(Schema != nullptr && "MemProf schema is not initialized!"); - serializeRecords(V, *Schema, Out); + V.serialize(*Schema, Out); } }; diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -14,9 +14,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProfReader.h" @@ -57,15 +59,16 @@ static Expected> create(const Twine &Path, const StringRef ProfiledBinary); - Error readNextRecord(MemProfRecord &Record); - - using Iterator = InstrProfIterator; + using GuidMemProfRecordPair = std::pair; + using Iterator = InstrProfIterator; Iterator end() { return Iterator(); } Iterator begin() { - Iter = ProfileData.begin(); + Iter = FunctionProfileData.begin(); return Iterator(this); } + Error readNextRecord(GuidMemProfRecordPair &GuidRecord); + // The RawMemProfReader only holds memory profile information. InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } @@ -75,7 +78,7 @@ llvm::MapVector &Prof, CallStackMap &SM) : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()), - ProfileData(Prof), StackMap(SM) { + CallstackProfileData(Prof), StackMap(SM) { // We don't call initialize here since there is no raw profile to read. The // test should pass in the raw profile as structured data. @@ -83,6 +86,8 @@ // initialized properly. if (Error E = symbolizeAndFilterStackFrames()) report_fatal_error(std::move(E)); + if (Error E = mapRawProfileToRecords()) + report_fatal_error(std::move(E)); } private: @@ -96,10 +101,12 @@ // symbolize or those that belong to the runtime. For profile entries where // the entire callstack is pruned, we drop the entry from the profile. Error symbolizeAndFilterStackFrames(); + // Construct memprof records for each function and store it in the + // `FunctionProfileData` map. A function may have allocation profile data or + // callsite data or both. + Error mapRawProfileToRecords(); object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); - Error fillRecord(const uint64_t Id, const MemInfoBlock &MIB, - MemProfRecord &Record); // Prints aggregate counts for each raw profile parsed from the DataBuffer in // YAML format. void printSummaries(raw_ostream &OS) const; @@ -112,15 +119,15 @@ llvm::SmallVector SegmentInfo; // A map from callstack id (same as key in CallStackMap below) to the heap // information recorded for that allocation context. - llvm::MapVector ProfileData; + llvm::MapVector CallstackProfileData; CallStackMap StackMap; // Cached symbolization from PC to Frame. llvm::DenseMap> SymbolizedFrame; - // Iterator to read from the ProfileData MapVector. - llvm::MapVector::iterator Iter = ProfileData.end(); + llvm::MapVector FunctionProfileData; + llvm::MapVector::iterator Iter; }; } // namespace memprof diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -253,28 +253,14 @@ Dest.sortValueData(); } -void InstrProfWriter::addRecord(const memprof::MemProfRecord &MR, +void InstrProfWriter::addRecord(const Function::GUID Id, + const memprof::MemProfRecord &Record, function_ref Warn) { - // Use 0 as a sentinel value since its highly unlikely that the lower 64-bits - // of a 128 bit md5 hash will be all zeros. - // TODO: Move this Key frame detection to the contructor to avoid having to - // scan all the callstacks again when adding a new record. - uint64_t Key = 0; - for (auto Iter = MR.CallStack.rbegin(), End = MR.CallStack.rend(); - Iter != End; Iter++) { - if (!Iter->IsInlineFrame) { - Key = Iter->Function; - break; - } - } - - if (Key == 0) { - Warn(make_error( - instrprof_error::invalid_prof, - "could not determine leaf function for memprof record.")); + auto Result = MemProfData.insert({Id, Record}); + if (!Result.second) { + memprof::MemProfRecord &Existing = Result.first->second; + Existing.merge(Record); } - - MemProfData[Key].push_back(MR); } void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, @@ -283,9 +269,9 @@ for (auto &Func : I.getValue()) addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); - for (auto &I : IPW.MemProfData) - for (const auto &MR : I.second) - addRecord(MR, Warn); + for (auto &I : IPW.MemProfData) { + addRecord(I.first, I.second, Warn); + } } bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { @@ -415,8 +401,8 @@ MemProfWriter->Schema = &Schema; OnDiskChainedHashTableGenerator MemProfGenerator; - for (const auto &I : MemProfData) { - // Insert the key (func hash) and value (vector of memprof records). + for (auto &I : MemProfData) { + // Insert the key (func hash) and value (memprof record). MemProfGenerator.insert(I.first, I.second); } diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -1,4 +1,6 @@ #include "llvm/ProfileData/MemProf.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -6,43 +8,76 @@ namespace llvm { namespace memprof { -void serializeRecords(const ArrayRef Records, - const MemProfSchema &Schema, raw_ostream &OS) { +void MemProfRecord::serialize(const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; endian::Writer LE(OS, little); - LE.write(Records.size()); - for (const MemProfRecord &MR : Records) { - LE.write(MR.CallStack.size()); - for (const MemProfRecord::Frame &F : MR.CallStack) { + LE.write(AllocSites.size()); + for (const AllocationInfo &N : AllocSites) { + LE.write(N.CallStack.size()); + for (const Frame &F : N.CallStack) + F.serialize(OS); + N.Info.serialize(Schema, OS); + } + + // Related contexts. + LE.write(CallSites.size()); + for (const auto &Frames : CallSites) { + LE.write(Frames.size()); + for (const Frame &F : Frames) F.serialize(OS); - } - MR.Info.serialize(Schema, OS); } } -SmallVector deserializeRecords(const MemProfSchema &Schema, - const unsigned char *Ptr) { +MemProfRecord MemProfRecord::deserialize(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; - SmallVector Records; - const uint64_t NumRecords = - endian::readNext(Ptr); - for (uint64_t I = 0; I < NumRecords; I++) { - MemProfRecord MR; + MemProfRecord Record; + + // Read the meminfo nodes. + const uint64_t NumNodes = endian::readNext(Ptr); + for (uint64_t I = 0; I < NumNodes; I++) { + MemProfRecord::AllocationInfo Node; const uint64_t NumFrames = endian::readNext(Ptr); for (uint64_t J = 0; J < NumFrames; J++) { const auto F = MemProfRecord::Frame::deserialize(Ptr); Ptr += MemProfRecord::Frame::serializedSize(); - MR.CallStack.push_back(F); + Node.CallStack.push_back(F); } - MR.Info.deserialize(Schema, Ptr); + Node.Info.deserialize(Schema, Ptr); Ptr += PortableMemInfoBlock::serializedSize(); - Records.push_back(MR); + Record.AllocSites.push_back(Node); } - return Records; + + // Read the callsite information. + const uint64_t NumCtxs = endian::readNext(Ptr); + for (uint64_t J = 0; J < NumCtxs; J++) { + const uint64_t NumFrames = + endian::readNext(Ptr); + llvm::SmallVector Frames; + for (uint64_t K = 0; K < NumFrames; K++) { + const auto F = MemProfRecord::Frame::deserialize(Ptr); + Ptr += MemProfRecord::Frame::serializedSize(); + Frames.push_back(F); + } + Record.CallSites.push_back(Frames); + } + + return Record; +} + +GlobalValue::GUID MemProfRecord::getGUID(const StringRef FunctionName) { + const auto Pos = FunctionName.find(".llvm."); + + // We use the function guid which we expect to be a uint64_t. At + // this time, it is the lower 64 bits of the md5 of the function + // name. Any suffix with .llvm. is trimmed since these are added by + // thinLTO global promotion. At the time the profile is consumed, + // these suffixes will not be present. + return Function::getGUID(FunctionName.take_front(Pos)); } Expected readMemProfSchema(const unsigned char *&Buffer) { diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -14,13 +14,13 @@ #include #include +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" -#include "llvm/IR/Function.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" @@ -163,11 +163,6 @@ return false; } -StringRef trimSuffix(const StringRef Name) { - const auto Pos = Name.find(".llvm."); - return Name.take_front(Pos); -} - Error report(Error E, const StringRef Context) { return joinErrors(createStringError(inconvertibleErrorCode(), Context), std::move(E)); @@ -233,9 +228,10 @@ printSummaries(OS); // Print out the merged contents of the profiles. OS << " Records:\n"; - for (const auto &Record : *this) { + for (const auto &Entry : *this) { OS << " -\n"; - Record.print(OS); + OS << " FunctionGUID: " << Entry.first << "\n"; + Entry.second.print(OS); } } @@ -288,7 +284,90 @@ if (Error E = readRawProfile()) return E; - return symbolizeAndFilterStackFrames(); + if (Error E = symbolizeAndFilterStackFrames()) + return E; + + return mapRawProfileToRecords(); +} + +Error RawMemProfReader::mapRawProfileToRecords() { + // Hold a mapping from function to each callsite location we encounter within + // it that is part of some dynamic allocation context. The location is stored + // as a pointer to a symbolized list of inline frames. + using LocationPtr = const llvm::SmallVector *; + llvm::DenseMap> + PerFunctionCallSites; + + // Convert the raw profile callstack data into memprof records. While doing so + // keep track of related contexts so that we can fill these in later. + for (const auto &Entry : CallstackProfileData) { + const uint64_t StackId = Entry.first; + + auto It = StackMap.find(StackId); + if (It == StackMap.end()) + return make_error( + instrprof_error::malformed, + "memprof callstack record does not contain id: " + Twine(StackId)); + + // Construct the symbolized callstack. + llvm::SmallVector Callstack; + Callstack.reserve(It->getSecond().size()); + + llvm::ArrayRef Addresses = It->getSecond(); + for (size_t I = 0; I < Addresses.size(); I++) { + const uint64_t Address = Addresses[I]; + assert(SymbolizedFrame.count(Address) > 0 && + "Address not found in SymbolizedFrame map"); + const SmallVector &Frames = + SymbolizedFrame[Address]; + + assert(!Frames.back().IsInlineFrame && + "The last frame should not be inlined"); + + // Record the callsites for each function. Skip the first frame of the + // first address since it is the allocation site itself that is recorded + // as an alloc site. + for (size_t J = 0; J < Frames.size(); J++) { + if (I == 0 && J == 0) + continue; + // We attach the entire bottom-up frame here for the callsite even + // though we only need the frames up to and including the frame for + // Frames[J].Function. This will enable better deduplication for + // compression in the future. + PerFunctionCallSites[Frames[J].Function].insert(&Frames); + } + + // Add all the frames to the current allocation callstack. + Callstack.append(Frames.begin(), Frames.end()); + } + + // We attach the memprof record to each function bottom-up including the + // first non-inline frame. + for (size_t I = 0; /*Break out using the condition below*/; I++) { + auto Result = + FunctionProfileData.insert({Callstack[I].Function, MemProfRecord()}); + MemProfRecord &Record = Result.first->second; + Record.AllocSites.emplace_back(Callstack, Entry.second); + + if (!Callstack[I].IsInlineFrame) + break; + } + } + + // Fill in the related callsites per function. + for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end(); + I != E; I++) { + const GlobalValue::GUID Id = I->first; + // Some functions may have only callsite data and no allocation data. Here + // we insert a new entry for callsite data if we need to. + auto Result = FunctionProfileData.insert({Id, MemProfRecord()}); + MemProfRecord &Record = Result.first->second; + for (LocationPtr Loc : I->getSecond()) { + Record.CallSites.push_back(*Loc); + } + } + + return Error::success(); } Error RawMemProfReader::symbolizeAndFilterStackFrames() { @@ -331,15 +410,10 @@ LLVM_DEBUG( // Print out the name to guid mapping for debugging. llvm::dbgs() << "FunctionName: " << Frame.FunctionName << " GUID: " - << Function::getGUID(trimSuffix(Frame.FunctionName)) + << MemProfRecord::getGUID(Frame.FunctionName) << "\n";); SymbolizedFrame[VAddr].emplace_back( - // We use the function guid which we expect to be a uint64_t. At - // this time, it is the lower 64 bits of the md5 of the function - // name. Any suffix with .llvm. is trimmed since these are added by - // thinLTO global promotion. At the time the profile is consumed, - // these suffixes will not be present. - Function::getGUID(trimSuffix(Frame.FunctionName)), + MemProfRecord::getGUID(Frame.FunctionName), Frame.Line - Frame.StartLine, Frame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); @@ -359,7 +433,7 @@ // Drop the entries where the callstack is empty. for (const uint64_t Id : EntriesToErase) { StackMap.erase(Id); - ProfileData.erase(Id); + CallstackProfileData.erase(Id); } if (StackMap.empty()) @@ -394,10 +468,10 @@ // raw profiles in the same binary file are from the same process so the // stackdepot ids are the same. for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { - if (ProfileData.count(Value.first)) { - ProfileData[Value.first].Merge(Value.second); + if (CallstackProfileData.count(Value.first)) { + CallstackProfileData[Value.first].Merge(Value.second); } else { - ProfileData[Value.first] = Value.second; + CallstackProfileData[Value.first] = Value.second; } } @@ -438,29 +512,14 @@ return object::SectionedAddress{VirtualAddress}; } -Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB, - MemProfRecord &Record) { - auto &CallStack = StackMap[Id]; - for (const uint64_t Address : CallStack) { - assert(SymbolizedFrame.count(Address) && - "Address not found in symbolized frame cache."); - Record.CallStack.append(SymbolizedFrame[Address]); - } - Record.Info = PortableMemInfoBlock(MIB); - return Error::success(); -} - -Error RawMemProfReader::readNextRecord(MemProfRecord &Record) { - if (ProfileData.empty()) +Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { + if (FunctionProfileData.empty()) return make_error(instrprof_error::empty_raw_profile); - if (Iter == ProfileData.end()) + if (Iter == FunctionProfileData.end()) return make_error(instrprof_error::eof); - Record.clear(); - if (Error E = fillRecord(Iter->first, Iter->second, Record)) { - return E; - } + GuidRecord = {Iter->first, Iter->second}; Iter++; return Error::success(); } diff --git a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe new file mode 100755 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ basic.memprofraw ``` @@ -46,56 +46,59 @@ CHECK-NEXT: NumStackOffsets: 3 CHECK-NEXT: Records: CHECK-NEXT: - -CHECK-NEXT: Callstack: +CHECK-NEXT: FunctionGUID: {{[0-9]+}} +CHECK-NEXT: AllocSites: CHECK-NEXT: - -CHECK-NEXT: Function: {{[0-9]+}} -CHECK-NEXT: LineOffset: 1 -CHECK-NEXT: Column: 21 -CHECK-NEXT: Inline: 0 -CHECK-NEXT: MemInfoBlock: -CHECK-NEXT: AllocCount: 1 -CHECK-NEXT: TotalAccessCount: 2 -CHECK-NEXT: MinAccessCount: 2 -CHECK-NEXT: MaxAccessCount: 2 -CHECK-NEXT: TotalSize: 10 -CHECK-NEXT: MinSize: 10 -CHECK-NEXT: MaxSize: 10 -CHECK-NEXT: AllocTimestamp: 986 -CHECK-NEXT: DeallocTimestamp: 986 -CHECK-NEXT: TotalLifetime: 0 -CHECK-NEXT: MinLifetime: 0 -CHECK-NEXT: MaxLifetime: 0 -CHECK-NEXT: AllocCpuId: 56 -CHECK-NEXT: DeallocCpuId: 56 -CHECK-NEXT: NumMigratedCpu: 0 -CHECK-NEXT: NumLifetimeOverlaps: 0 -CHECK-NEXT: NumSameAllocCpu: 0 -CHECK-NEXT: NumSameDeallocCpu: 0 -CHECK-NEXT: DataTypeId: {{[0-9]+}} -CHECK-NEXT: - -CHECK-NEXT: Callstack: +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 21 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 2 +CHECK-NEXT: MinAccessCount: 2 +CHECK-NEXT: MaxAccessCount: 2 +CHECK-NEXT: TotalSize: 10 +CHECK-NEXT: MinSize: 10 +CHECK-NEXT: MaxSize: 10 +CHECK-NEXT: AllocTimestamp: 986 +CHECK-NEXT: DeallocTimestamp: 986 +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: 56 +CHECK-NEXT: DeallocCpuId: 56 +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} CHECK-NEXT: - -CHECK-NEXT: Function: {{[0-9]+}} -CHECK-NEXT: LineOffset: 5 -CHECK-NEXT: Column: 15 -CHECK-NEXT: Inline: 0 -CHECK-NEXT: MemInfoBlock: -CHECK-NEXT: AllocCount: 1 -CHECK-NEXT: TotalAccessCount: 2 -CHECK-NEXT: MinAccessCount: 2 -CHECK-NEXT: MaxAccessCount: 2 -CHECK-NEXT: TotalSize: 10 -CHECK-NEXT: MinSize: 10 -CHECK-NEXT: MaxSize: 10 -CHECK-NEXT: AllocTimestamp: 987 -CHECK-NEXT: DeallocTimestamp: 987 -CHECK-NEXT: TotalLifetime: 0 -CHECK-NEXT: MinLifetime: 0 -CHECK-NEXT: MaxLifetime: 0 -CHECK-NEXT: AllocCpuId: 56 -CHECK-NEXT: DeallocCpuId: 56 -CHECK-NEXT: NumMigratedCpu: 0 -CHECK-NEXT: NumLifetimeOverlaps: 0 -CHECK-NEXT: NumSameAllocCpu: 0 -CHECK-NEXT: NumSameDeallocCpu: 0 -CHECK-NEXT: DataTypeId: {{[0-9]+}} +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: LineOffset: 5 +CHECK-NEXT: Column: 15 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 2 +CHECK-NEXT: MinAccessCount: 2 +CHECK-NEXT: MaxAccessCount: 2 +CHECK-NEXT: TotalSize: 10 +CHECK-NEXT: MinSize: 10 +CHECK-NEXT: MaxSize: 10 +CHECK-NEXT: AllocTimestamp: 987 +CHECK-NEXT: DeallocTimestamp: 987 +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: 56 +CHECK-NEXT: DeallocCpuId: 56 +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} diff --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test --- a/llvm/test/tools/llvm-profdata/memprof-inline.test +++ b/llvm/test/tools/llvm-profdata/memprof-inline.test @@ -50,7 +50,7 @@ [..omit output here which is checked below..] ``` -RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/memprof-inline.exe +RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/inline.memprofexe | FileCheck %s CHECK: MemprofProfile: CHECK-NEXT: - @@ -62,45 +62,123 @@ CHECK-NEXT: NumStackOffsets: 2 CHECK-NEXT: Records: CHECK-NEXT: - -CHECK-NEXT: Callstack: +CHECK-NEXT: FunctionGUID: 15505678318020221912 +CHECK-NEXT: AllocSites: CHECK-NEXT: - -CHECK-NEXT: Function: 15505678318020221912 -CHECK-NEXT: LineOffset: 1 -CHECK-NEXT: Column: 15 -CHECK-NEXT: Inline: 0 +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 15 +CHECK-NEXT: Inline: 1 +CHECK-NEXT: - +CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: LineOffset: 0 +CHECK-NEXT: Column: 18 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: - +CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: LineOffset: 0 +CHECK-NEXT: Column: 19 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: - +CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 3 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 1 +CHECK-NEXT: MinAccessCount: 1 +CHECK-NEXT: MaxAccessCount: 1 +CHECK-NEXT: TotalSize: 1 +CHECK-NEXT: MinSize: 1 +CHECK-NEXT: MaxSize: 1 +CHECK-NEXT: AllocTimestamp: 894 +CHECK-NEXT: DeallocTimestamp: 894 +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: 23 +CHECK-NEXT: DeallocCpuId: 23 +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} +CHECK-NEXT: - +CHECK-NEXT: FunctionGUID: 6699318081062747564 +CHECK-NEXT: AllocSites: CHECK-NEXT: - -CHECK-NEXT: Function: 6699318081062747564 -CHECK-NEXT: LineOffset: 0 -CHECK-NEXT: Column: 18 -CHECK-NEXT: Inline: 1 +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 15 +CHECK-NEXT: Inline: 1 +CHECK-NEXT: - +CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: LineOffset: 0 +CHECK-NEXT: Column: 18 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: - +CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: LineOffset: 0 +CHECK-NEXT: Column: 19 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: - +CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 3 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 1 +CHECK-NEXT: MinAccessCount: 1 +CHECK-NEXT: MaxAccessCount: 1 +CHECK-NEXT: TotalSize: 1 +CHECK-NEXT: MinSize: 1 +CHECK-NEXT: MaxSize: 1 +CHECK-NEXT: AllocTimestamp: 894 +CHECK-NEXT: DeallocTimestamp: 894 +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: 23 +CHECK-NEXT: DeallocCpuId: 23 +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} +CHECK-NEXT: CallSites: CHECK-NEXT: - -CHECK-NEXT: Function: 16434608426314478903 -CHECK-NEXT: LineOffset: 0 -CHECK-NEXT: Column: 19 -CHECK-NEXT: Inline: 0 +CHECK-NEXT: - +CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 15 +CHECK-NEXT: Inline: 1 CHECK-NEXT: - -CHECK-NEXT: Function: 15822663052811949562 -CHECK-NEXT: LineOffset: 1 -CHECK-NEXT: Column: 3 -CHECK-NEXT: Inline: 0 -CHECK-NEXT: MemInfoBlock: -CHECK-NEXT: AllocCount: 1 -CHECK-NEXT: TotalAccessCount: 1 -CHECK-NEXT: MinAccessCount: 1 -CHECK-NEXT: MaxAccessCount: 1 -CHECK-NEXT: TotalSize: 1 -CHECK-NEXT: MinSize: 1 -CHECK-NEXT: MaxSize: 1 -CHECK-NEXT: AllocTimestamp: 894 -CHECK-NEXT: DeallocTimestamp: 894 -CHECK-NEXT: TotalLifetime: 0 -CHECK-NEXT: MinLifetime: 0 -CHECK-NEXT: MaxLifetime: 0 -CHECK-NEXT: AllocCpuId: 23 -CHECK-NEXT: DeallocCpuId: 23 -CHECK-NEXT: NumMigratedCpu: 0 -CHECK-NEXT: NumLifetimeOverlaps: 0 -CHECK-NEXT: NumSameAllocCpu: 0 -CHECK-NEXT: NumSameDeallocCpu: 0 -CHECK-NEXT: DataTypeId: {{[0-9]+}} - +CHECK-NEXT: - +CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: LineOffset: 0 +CHECK-NEXT: Column: 18 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: - +CHECK-NEXT: FunctionGUID: 15822663052811949562 +CHECK-NEXT: CallSites: +CHECK-NEXT: - +CHECK-NEXT: - +CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 3 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: - +CHECK-NEXT: FunctionGUID: 16434608426314478903 +CHECK-NEXT: CallSites: +CHECK-NEXT: - +CHECK-NEXT: - +CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: LineOffset: 0 +CHECK-NEXT: Column: 19 +CHECK-NEXT: Inline: 0 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -267,8 +267,8 @@ } // Add the records into the writer context. - for (const memprof::MemProfRecord &MR : *Reader) { - WC->Writer.addRecord(MR, [&](Error E) { + for (auto I = Reader->begin(), E = Reader->end(); I != E; ++I) { + WC->Writer.addRecord(/*Id=*/I->first, /*Record=*/I->second, [&](Error E) { instrprof_error IPE = InstrProfError::take(std::move(E)); WC->Errors.emplace_back(make_error(IPE), Filename); }); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -13,6 +13,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/MemProfData.inc" #include "llvm/Support/Compression.h" #include "llvm/Testing/Support/Error.h" #include "llvm/Testing/Support/SupportHelpers.h" @@ -222,18 +223,41 @@ ASSERT_EQ(0U, R->Counts[1]); } +using ::llvm::memprof::MemInfoBlock; +using ::llvm::memprof::MemProfRecord; +MemProfRecord +makeRecord(std::initializer_list> + AllocFrames, + std::initializer_list> + CallSiteFrames, + const MemInfoBlock &Block = MemInfoBlock()) { + llvm::memprof::MemProfRecord MR; + for (const auto &Frames : AllocFrames) + MR.AllocSites.emplace_back(Frames, Block); + for (const auto &Frames : CallSiteFrames) + MR.CallSites.push_back(Frames); + return MR; +} + TEST_F(InstrProfTest, test_memprof) { ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), Succeeded()); - llvm::memprof::MemProfRecord MR; - MR.CallStack.push_back({0x123, 1, 2, false}); - MR.CallStack.push_back({0x345, 3, 4, true}); - Writer.addRecord(MR, Err); + + const MemProfRecord MR = makeRecord( + /*AllocFrames=*/ + { + {{0x123, 1, 2, false}, {0x345, 3, 4, true}}, + {{0x125, 5, 6, false}, {0x567, 7, 8, true}}, + }, + /*CallSiteFrames=*/{ + {{0x124, 5, 6, false}, {0x789, 8, 9, true}}, + }); + Writer.addRecord(/*Id=*/0x9999, MR, Err); auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); - auto RecordsOr = Reader->getMemProfRecord(0x123); + auto RecordsOr = Reader->getMemProfRecord(0x9999); ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded()); const auto Records = RecordsOr.get(); ASSERT_EQ(Records.size(), 1U); @@ -247,10 +271,16 @@ ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::MemProf), Succeeded()); - llvm::memprof::MemProfRecord MR; - MR.CallStack.push_back({0x123, 1, 2, false}); - MR.CallStack.push_back({0x345, 3, 4, true}); - Writer2.addRecord(MR, Err); + const MemProfRecord MR = makeRecord( + /*AllocFrames=*/ + { + {{0x123, 1, 2, false}, {0x345, 3, 4, true}}, + {{0x125, 5, 6, false}, {0x567, 7, 8, true}}, + }, + /*CallSiteFrames=*/{ + {{0x124, 5, 6, false}, {0x789, 8, 9, true}}, + }); + Writer2.addRecord(/*Id=*/0x9999, MR, Err); ASSERT_THAT_ERROR(Writer.mergeProfileKind(Writer2.getProfileKind()), Succeeded()); @@ -264,25 +294,13 @@ ASSERT_EQ(1U, R->Counts.size()); ASSERT_EQ(42U, R->Counts[0]); - auto RecordsOr = Reader->getMemProfRecord(0x123); + auto RecordsOr = Reader->getMemProfRecord(0x9999); ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded()); const auto Records = RecordsOr.get(); ASSERT_EQ(Records.size(), 1U); EXPECT_EQ(Records[0], MR); } -TEST_F(InstrProfTest, test_memprof_invalid_add_record) { - llvm::memprof::MemProfRecord MR; - // At least one of the frames should be a non-inline frame. - MR.CallStack.push_back({0x123, 1, 2, true}); - MR.CallStack.push_back({0x345, 3, 4, true}); - - auto CheckErr = [](Error &&E) { - EXPECT_TRUE(ErrorEquals(instrprof_error::invalid_prof, std::move(E))); - }; - Writer.addRecord(MR, CheckErr); -} - static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; static const char callee3[] = "callee3"; diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -4,6 +4,7 @@ #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Value.h" #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProfData.inc" @@ -133,6 +134,13 @@ TEST(MemProf, FillsValue) { std::unique_ptr Symbolizer(new MockSymbolizer()); + EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x1000}, + specifier(), false)) + .Times(1) // Only once since we remember invalid PCs. + .WillRepeatedly(Return(makeInliningInfo({ + {"new", 70, 57, 3, "memprof/memprof_new_delete.cpp"}, + }))); + EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x2000}, specifier(), false)) .Times(1) // Only once since we cache the result for future lookups. @@ -141,41 +149,98 @@ {"bar", 201, 150, 20}, }))); - EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x6000}, + EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x3000}, specifier(), false)) .Times(1) .WillRepeatedly(Return(makeInliningInfo({ - {"baz", 10, 5, 30}, - {"qux.llvm.12345", 75, 70, 10}, + {"xyz", 10, 5, 30}, + {"abc", 10, 5, 30}, }))); CallStackMap CSM; - CSM[0x1] = {0x2000}; - CSM[0x2] = {0x6000, 0x2000}; + CSM[0x1] = {0x1000, 0x2000, 0x3000}; llvm::MapVector Prof; Prof[0x1].AllocCount = 1; - Prof[0x2].AllocCount = 2; auto Seg = makeSegments(); RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM); - std::vector Records; - for (const MemProfRecord &R : Reader) { - Records.push_back(R); + llvm::DenseMap Records; + for (const auto &Pair : Reader) { + Records.insert({Pair.first, Pair.second}); } - EXPECT_EQ(Records.size(), 2U); - - EXPECT_EQ(Records[0].Info.getAllocCount(), 1U); - EXPECT_EQ(Records[1].Info.getAllocCount(), 2U); - EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, true)); - EXPECT_THAT(Records[0].CallStack[1], FrameContains("bar", 51U, 20U, false)); - EXPECT_THAT(Records[1].CallStack[0], FrameContains("baz", 5U, 30U, true)); - EXPECT_THAT(Records[1].CallStack[1], FrameContains("qux", 5U, 10U, false)); - EXPECT_THAT(Records[1].CallStack[2], FrameContains("foo", 5U, 30U, true)); - EXPECT_THAT(Records[1].CallStack[3], FrameContains("bar", 51U, 20U, false)); + // Mock program psuedocode and expected memprof record contents. + // + // AllocSite CallSite + // inline foo() { new(); } Y N + // bar() { foo(); } Y Y + // inline xyz() { bar(); } N Y + // abc() { xyz(); } N Y + + // We expect 4 records. We attach alloc site data to foo and bar, i.e. + // all frames bottom up until we find a non-inline frame. We attach call site + // data to bar, xyz and abc. + ASSERT_EQ(Records.size(), 4U); + + // Check the memprof record for foo. + const llvm::GlobalValue::GUID FooId = MemProfRecord::getGUID("foo"); + ASSERT_EQ(Records.count(FooId), 1U); + const MemProfRecord &Foo = Records[FooId]; + ASSERT_EQ(Foo.AllocSites.size(), 1U); + EXPECT_EQ(Foo.AllocSites[0].Info.getAllocCount(), 1U); + EXPECT_THAT(Foo.AllocSites[0].CallStack[0], + FrameContains("foo", 5U, 30U, true)); + EXPECT_THAT(Foo.AllocSites[0].CallStack[1], + FrameContains("bar", 51U, 20U, false)); + EXPECT_THAT(Foo.AllocSites[0].CallStack[2], + FrameContains("xyz", 5U, 30U, true)); + EXPECT_THAT(Foo.AllocSites[0].CallStack[3], + FrameContains("abc", 5U, 30U, false)); + EXPECT_TRUE(Foo.CallSites.empty()); + + // Check the memprof record for bar. + const llvm::GlobalValue::GUID BarId = MemProfRecord::getGUID("bar"); + ASSERT_EQ(Records.count(BarId), 1U); + const MemProfRecord &Bar = Records[BarId]; + ASSERT_EQ(Bar.AllocSites.size(), 1U); + EXPECT_EQ(Bar.AllocSites[0].Info.getAllocCount(), 1U); + EXPECT_THAT(Bar.AllocSites[0].CallStack[0], + FrameContains("foo", 5U, 30U, true)); + EXPECT_THAT(Bar.AllocSites[0].CallStack[1], + FrameContains("bar", 51U, 20U, false)); + EXPECT_THAT(Bar.AllocSites[0].CallStack[2], + FrameContains("xyz", 5U, 30U, true)); + EXPECT_THAT(Bar.AllocSites[0].CallStack[3], + FrameContains("abc", 5U, 30U, false)); + + ASSERT_EQ(Bar.CallSites.size(), 1U); + ASSERT_EQ(Bar.CallSites[0].size(), 2U); + EXPECT_THAT(Bar.CallSites[0][0], FrameContains("foo", 5U, 30U, true)); + EXPECT_THAT(Bar.CallSites[0][1], FrameContains("bar", 51U, 20U, false)); + + // Check the memprof record for xyz. + const llvm::GlobalValue::GUID XyzId = MemProfRecord::getGUID("xyz"); + ASSERT_EQ(Records.count(XyzId), 1U); + const MemProfRecord &Xyz = Records[XyzId]; + ASSERT_EQ(Xyz.CallSites.size(), 1U); + ASSERT_EQ(Xyz.CallSites[0].size(), 2U); + // Expect the entire frame even though in practice we only need the first + // entry here. + EXPECT_THAT(Xyz.CallSites[0][0], FrameContains("xyz", 5U, 30U, true)); + EXPECT_THAT(Xyz.CallSites[0][1], FrameContains("abc", 5U, 30U, false)); + + // Check the memprof record for abc. + const llvm::GlobalValue::GUID AbcId = MemProfRecord::getGUID("abc"); + ASSERT_EQ(Records.count(AbcId), 1U); + const MemProfRecord &Abc = Records[AbcId]; + EXPECT_TRUE(Abc.AllocSites.empty()); + ASSERT_EQ(Abc.CallSites.size(), 1U); + ASSERT_EQ(Abc.CallSites[0].size(), 2U); + EXPECT_THAT(Abc.CallSites[0][0], FrameContains("xyz", 5U, 30U, true)); + EXPECT_THAT(Abc.CallSites[0][1], FrameContains("abc", 5U, 30U, false)); } TEST(MemProf, PortableWrapper) { @@ -206,36 +271,33 @@ TEST(MemProf, RecordSerializationRoundTrip) { const MemProfSchema Schema = getFullSchema(); - llvm::SmallVector Records; - MemProfRecord MR; - MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, /*dealloc_cpu=*/4); - MR.Info = PortableMemInfoBlock(Info); - MR.CallStack.push_back({0x123, 1, 2, false}); - MR.CallStack.push_back({0x345, 3, 4, false}); - Records.push_back(MR); + llvm::SmallVector> AllocCallStacks = { + {{0x123, 1, 2, false}, {0x345, 3, 4, false}}, + {{0x123, 1, 2, false}, {0x567, 5, 6, false}}}; - MR.clear(); - MR.Info = PortableMemInfoBlock(Info); - MR.CallStack.push_back({0x567, 5, 6, false}); - MR.CallStack.push_back({0x789, 7, 8, false}); - Records.push_back(MR); + llvm::SmallVector> CallSites = { + {{0x333, 1, 2, false}, {0x777, 3, 4, true}}}; + + MemProfRecord Record; + for (const auto &ACS : AllocCallStacks) { + // Use the same info block for both allocation sites. + Record.AllocSites.emplace_back(ACS, Info); + } + Record.CallSites.assign(CallSites); std::string Buffer; llvm::raw_string_ostream OS(Buffer); - serializeRecords(Records, Schema, OS); + Record.serialize(Schema, OS); OS.flush(); - const llvm::SmallVector GotRecords = deserializeRecords( + const MemProfRecord GotRecord = MemProfRecord::deserialize( Schema, reinterpret_cast(Buffer.data())); - ASSERT_TRUE(!GotRecords.empty()); - EXPECT_EQ(GotRecords.size(), Records.size()); - EXPECT_THAT(GotRecords[0], EqualsRecord(Records[0])); - EXPECT_THAT(GotRecords[1], EqualsRecord(Records[1])); + EXPECT_THAT(GotRecord, EqualsRecord(Record)); } TEST(MemProf, SymbolizationFilter) { @@ -283,12 +345,15 @@ RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM); - std::vector Records; - for (const MemProfRecord &R : Reader) { - Records.push_back(R); + llvm::SmallVector Records; + for (const auto &KeyRecordPair : Reader) { + Records.push_back(KeyRecordPair.second); } + ASSERT_EQ(Records.size(), 1U); - ASSERT_EQ(Records[0].CallStack.size(), 1U); - EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, false)); + ASSERT_EQ(Records[0].AllocSites.size(), 1U); + ASSERT_EQ(Records[0].AllocSites[0].CallStack.size(), 1U); + EXPECT_THAT(Records[0].AllocSites[0].CallStack[0], + FrameContains("foo", 5U, 30U, false)); } } // namespace