diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -650,7 +650,7 @@ /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 7 +#define INSTR_PROF_INDEX_VERSION 8 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 @@ -662,6 +662,7 @@ * The 59th bit indicates whether to use debug info to correlate profiles. * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. + * The 62nd bit indicates whether memory profile information is present. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) @@ -671,6 +672,7 @@ #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59) #define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) #define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) +#define VARIANT_MASK_MEMPROF (0x1ULL << 62) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -287,7 +287,8 @@ CS = 0x8, // A context sensitive IR-level profile. SingleByteCoverage = 0x10, // Use single byte probes for coverage. FunctionEntryOnly = 0x20, // Only instrument the function entry basic block. - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly) + MemProf = 0x40, // A memory profile collected using -fmemory-profile. + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/MemProf) }; const std::error_category &instrprof_category(); @@ -1011,7 +1012,9 @@ Version6 = 6, // An additional counter is added around logical operators. Version7 = 7, - // The current version is 7. + // An additional (optional) memory profile type is added. + Version8 = 8, + // The current version is 8. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1028,6 +1031,7 @@ uint64_t Unused; // Becomes unused since version 4 uint64_t HashType; uint64_t HashOffset; + uint64_t MemProfOffset; // New fields should only be added at the end to ensure that the size // computation is correct. The methods below need to be updated to ensure that // the new field is read correctly. diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -650,7 +650,7 @@ /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 7 +#define INSTR_PROF_INDEX_VERSION 8 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 @@ -662,6 +662,7 @@ * The 59th bit indicates whether to use debug info to correlate profiles. * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. + * The 62nd bit indicates whether memory profile information is present. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) @@ -671,6 +672,7 @@ #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59) #define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) #define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) +#define VARIANT_MASK_MEMPROF (0x1ULL << 62) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -19,6 +19,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" @@ -471,6 +472,9 @@ using OnDiskHashTableImplV3 = OnDiskIterableChainedHashTable; +using MemProfHashTable = + OnDiskIterableChainedHashTable; + template class InstrProfReaderItaniumRemapper; @@ -556,6 +560,11 @@ std::unique_ptr Summary; /// Context sensitive profile summary data. std::unique_ptr CS_Summary; + /// MemProf profile schema (if available). + memprof::MemProfSchema Schema; + /// MemProf profile data on-disk indexed via llvm::md5(FunctionName). + std::unique_ptr MemProfTable; + // Index to the current record in the record array. unsigned RecordIndex; @@ -609,6 +618,11 @@ Expected getInstrProfRecord(StringRef FuncName, uint64_t FuncHash); + /// Return the memprof records for the function identified by + /// llvm::md5(Name). + Expected> + getMemProfRecord(uint64_t FuncNameHash); + /// Fill Counts with the profile data for the given function name. Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts); diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" @@ -37,6 +38,11 @@ private: bool Sparse; StringMap FunctionData; + + // A map to hold memprof data per function. The lower 64 bits obtained from + // the md5 hash of the function name is used to index into the map. + memprof::FunctionMemProfMap MemProfData; + // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. @@ -57,6 +63,9 @@ addRecord(std::move(I), 1, Warn); } + void addRecord(const ::llvm::memprof::MemProfRecord &MR, + function_ref Warn); + /// Merge existing function counts from the given writer. void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn); @@ -112,6 +121,8 @@ return Error::success(); } + InstrProfKind getProfileKind() const { return ProfileKind; } + // Internal interface for testing purpose only. void setValueProfDataEndianness(support::endianness Endianness); void setOutputSparse(bool Sparse); diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -5,6 +5,7 @@ #include #include +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/ProfileData/ProfileCommon.h" @@ -134,18 +135,52 @@ }; struct MemProfRecord { - struct Frame { - std::string Function; + // Describes a call frame for a dynamic allocation context. The contents of + // the frame are populated by symbolizing the stack depot call frame from the + // compiler runtime. + PACKED(struct Frame { + // A uuid (uint64_t) identifying the function. It is obtained by + // llvm::md5(FunctionName) which returns the lower 64 bits. + GlobalValue::GUID Function; + // The source line offset of the call from the beginning of parent function. uint32_t LineOffset; + // The source column number of the call to help distinguish multiple calls + // on the same line. uint32_t Column; + // Whether the current frame is inlined. bool IsInlineFrame; - Frame(std::string Str, uint32_t Off, uint32_t Col, bool Inline) - : Function(std::move(Str)), LineOffset(Off), Column(Col), - IsInlineFrame(Inline) {} - }; + Frame(uint64_t Hash, uint32_t Off, uint32_t Col, bool Inline) + : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} + bool operator==(const Frame &Other) const { + return Other.Function == Function && Other.LineOffset == LineOffset && + Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; + } + + bool operator!=(const Frame &Other) const { return !operator==(Other); } + + // Write the contents of the frame to the ostream \p OS. + void write(raw_ostream & OS) const { + using namespace support; + + endian::Writer LE(OS, little); + + // If the type of the GlobalValue::GUID changes, then we need to update + // the reader and the writer. + static_assert(std::is_same::value, + "Expect GUID to be uint64_t."); + LE.write(Function); + + LE.write(LineOffset); + LE.write(Column); + LE.write(IsInlineFrame); + } + }); + + // The dynamic calling context for the allocation. std::vector CallStack; + // The statistics obtained from the runtime for the allocation. PortableMemInfoBlock Info; void clear() { @@ -153,6 +188,12 @@ Info.clear(); } + size_t serializedSize() const { + return sizeof(uint64_t) + // The number of frames to serialize. + sizeof(Frame) * CallStack.size() + // The contents of the frames. + PortableMemInfoBlock::serializedSize(); // The size of the payload. + } + // Prints out the contents of the memprof record in YAML. void print(llvm::raw_ostream &OS) const { OS << " Callstack:\n"; @@ -168,6 +209,138 @@ Info.printYAML(OS); } + + bool operator==(const MemProfRecord &Other) const { + if (Other.Info != Info) + return false; + + if (Other.CallStack.size() != CallStack.size()) + return false; + + for (size_t I = 0; I < Other.CallStack.size(); I++) { + if (Other.CallStack[I] != CallStack[I]) + return false; + } + return true; + } +}; + +// Serializes the memprof records in \p Records to the ostream \p OS based on +// the schema provided in \p Schema. +void serializeRecords(const ArrayRef Records, + const MemProfSchema &Schema, raw_ostream &OS); + +// Deserializes memprof records from the Buffer +SmallVector deserializeRecords(const MemProfSchema &Schema, + const unsigned char *Buffer); + +// Reads a memprof schema from a buffer. All entries in the buffer are +// interpreted as uint64_t. The first entry in the buffer denotes the number of +// ids in the schema. Subsequent entries are integers which map to memprof::Meta +// enum class entries. After successfully reading the schema, the pointer is one +// byte past the schema contents. +Expected readMemProfSchema(const unsigned char *&Buffer); + +using FunctionMemProfMap = + DenseMap>; + +/// Trait for lookups into the on-disk hash table for memprof format in the +/// indexed profile. +class MemProfRecordLookupTrait { +public: + using data_type = ArrayRef; + using internal_key_type = uint64_t; + using external_key_type = uint64_t; + using hash_value_type = uint64_t; + using offset_type = uint64_t; + + MemProfRecordLookupTrait() = delete; + MemProfRecordLookupTrait(const MemProfSchema &S) : Schema(S) {} + + static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } + static uint64_t GetInternalKey(uint64_t K) { return K; } + static uint64_t GetExternalKey(uint64_t K) { return K; } + + hash_value_type ComputeHash(uint64_t K) { return K; } + + static std::pair + ReadKeyDataLength(const unsigned char *&D) { + using namespace support; + + offset_type KeyLen = endian::readNext(D); + offset_type DataLen = endian::readNext(D); + return std::make_pair(KeyLen, DataLen); + } + + uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { + using namespace support; + return endian::readNext(D); + } + + data_type ReadData(uint64_t K, const unsigned char *D, + offset_type /*Unused*/) { + Records = deserializeRecords(Schema, D); + return Records; + } + +private: + // Holds the memprof schema used to deserialize records. + MemProfSchema Schema; + // Holds the records from one function deserialized from the indexed format. + llvm::SmallVector Records; +}; + +class MemProfRecordWriterTrait { +public: + using key_type = uint64_t; + using key_type_ref = uint64_t; + + using data_type = ArrayRef; + using data_type_ref = ArrayRef; + + using hash_value_type = uint64_t; + using offset_type = uint64_t; + + // Pointer to the memprof schema to use for the generator. Unlike the reader + // we must use a default constructor with no params for the writer trait so we + // have a public member which must be initialized by the user. + MemProfSchema *Schema = nullptr; + + MemProfRecordWriterTrait() = default; + + static hash_value_type ComputeHash(key_type_ref K) { return K; } + + static std::pair + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace support; + + endian::Writer LE(Out, little); + + offset_type N = sizeof(K); + LE.write(N); + + offset_type M = 0; + + M += sizeof(uint64_t); + for (const auto &Record : V) { + M += Record.serializedSize(); + } + + LE.write(M); + return std::make_pair(N, M); + } + + void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { + using namespace support; + endian::Writer LE(Out, little); + LE.write(K); + } + + void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, + offset_type /*Unused*/) { + assert(Schema != nullptr && "MemProf schema is not initialized!"); + serializeRecords(V, *Schema, Out); + } }; } // namespace memprof diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -1,5 +1,5 @@ -#ifndef LLVM_PROFILEDATA_MEMPROFDATA_INC -#define LLVM_PROFILEDATA_MEMPROFDATA_INC +#ifndef MEMPROF_DATA_INC +#define MEMPROF_DATA_INC /*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ |* |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -66,6 +66,9 @@ return Iterator(this); } + // The RawMemProfReader only holds memory profile information. + InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } + // Constructor for unittests only. RawMemProfReader(std::unique_ptr Sym, llvm::SmallVectorImpl &Seg, diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -4,6 +4,7 @@ InstrProfCorrelator.cpp InstrProfReader.cpp InstrProfWriter.cpp + MemProf.cpp ProfileSummaryBuilder.cpp SampleProf.cpp SampleProfReader.cpp diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1345,8 +1345,15 @@ return make_error(instrprof_error::unsupported_version); switch (GET_VERSION(FormatVersion)) { - // When a new field is added in the header add a case statement here to - // populate it. + // When a new field is added in the header add a case statement here to + // populate it. + static_assert( + IndexedInstrProf::ProfVersion::CurrentVersion == Version8, + "Please update the reading code below if a new field has been added, " + "if not add a case statement to fall through to the latest version."); + case 8ull: + H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset)); + LLVM_FALLTHROUGH; default: H.HashType = read(Buffer, offsetOf(&Header::HashType)); H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset)); @@ -1357,9 +1364,15 @@ size_t Header::size() const { switch (GET_VERSION(Version)) { - // When a new field is added to the header add a case statement here to - // compute the size as offset of the new field + size of the new field. This - // relies on the field being added to the end of the list. + // When a new field is added to the header add a case statement here to + // compute the size as offset of the new field + size of the new field. This + // relies on the field being added to the end of the list. + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version8, + "Please update the size computation below if a new field has " + "been added to the header, if not add a case statement to " + "fall through to the latest version."); + case 8ull: + return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset); default: return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset); } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -19,7 +19,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/ProfileData/RawMemProfReader.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" @@ -57,6 +59,9 @@ if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { ProfileKind |= InstrProfKind::FunctionEntryOnly; } + if (Version & VARIANT_MASK_MEMPROF) { + ProfileKind |= InstrProfKind::MemProf; + } return ProfileKind; } @@ -955,10 +960,35 @@ uint64_t HashOffset = endian::byte_swap(Header->HashOffset); - // The rest of the file is an on disk hash table. + // The hash table with profile counts comes next. auto IndexPtr = std::make_unique>( Start + HashOffset, Cur, Start, HashType, Header->Version); + // The MemProfOffset field in the header is only valid when the format version + // is higher than 8 (when it was introduced). + if (GET_VERSION(Header->Version) >= 8 && + Header->Version & VARIANT_MASK_MEMPROF) { + uint64_t MemProfOffset = + endian::byte_swap(Header->MemProfOffset); + + const unsigned char *Ptr = Start + MemProfOffset; + // The value returned from Generator.Emit. + const uint64_t TableOffset = + support::endian::readNext(Ptr); + + // Read the schema. + auto SchemaOr = memprof::readMemProfSchema(Ptr); + if (!SchemaOr) + return SchemaOr.takeError(); + Schema = SchemaOr.get(); + + // Now initialize the table reader with a pointer into data buffer. + MemProfTable.reset(MemProfHashTable::Create( + /*Buckets=*/Start + TableOffset, + /*Payload=*/Ptr, + /*Base=*/Start, memprof::MemProfRecordLookupTrait(Schema))); + } + // Load the remapping table now if requested. if (RemappingBuffer) { Remapper = std::make_unique< @@ -1003,6 +1033,17 @@ return error(instrprof_error::hash_mismatch); } +Expected> +IndexedInstrProfReader::getMemProfRecord(uint64_t FuncNameHash) { + auto Iter = MemProfTable->find(FuncNameHash); + if (Iter == MemProfTable->end()) + // TODO: Add memprof specific errors. + return make_error(instrprof_error::hash_mismatch, + "memprof record not found for hash " + + Twine(FuncNameHash)); + return *Iter; +} + Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -63,11 +64,16 @@ if (IsFDOStream) { raw_fd_ostream &FDOStream = static_cast(OS); + const uint64_t LastPos = FDOStream.tell(); for (int K = 0; K < NItems; K++) { FDOStream.seek(P[K].Pos); for (int I = 0; I < P[K].N; I++) write(P[K].D[I]); } + // Reset the stream to the last position after patching so that users + // don't accidentally overwrite data. This makes it consistent with + // the string stream below which replaces the data directly. + FDOStream.seek(LastPos); } else { raw_string_ostream &SOStream = static_cast(OS); std::string &Data = SOStream.str(); // with flush @@ -248,11 +254,39 @@ Dest.sortValueData(); } +void InstrProfWriter::addRecord(const memprof::MemProfRecord &MR, + function_ref Warn) { + // Use 0 as a sentinel value since its highly unlikely that the lower 64-bits + // of a 128 bit md5 hash will be all zeros. + // TODO: Move this Key frame detection to the contructor to avoid having to + // scan all the callstacks again when adding a new record. + uint64_t Key = 0; + for (auto Iter = MR.CallStack.rbegin(), End = MR.CallStack.rend(); + Iter != End; Iter++) { + if (!Iter->IsInlineFrame) { + Key = Iter->Function; + break; + } + } + + if (Key == 0) { + Warn(make_error( + instrprof_error::invalid_prof, + "could not determine leaf function for memprof record.")); + } + + MemProfData[Key].push_back(MR); +} + void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn) { for (auto &I : IPW.FunctionData) for (auto &Func : I.getValue()) addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); + + for (auto &I : IPW.MemProfData) + for (const auto &MR : I.second) + addRecord(MR, Warn); } bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { @@ -297,6 +331,7 @@ for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) Generator.insert(I.getKey(), &I.getValue()); + // Write the header. IndexedInstrProf::Header Header; Header.Magic = IndexedInstrProf::Magic; @@ -311,16 +346,18 @@ Header.Version |= VARIANT_MASK_BYTE_COVERAGE; if (static_cast(ProfileKind & InstrProfKind::FunctionEntryOnly)) Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; + if (static_cast(ProfileKind & InstrProfKind::MemProf)) + Header.Version |= VARIANT_MASK_MEMPROF; Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; + Header.MemProfOffset = 0; int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); - // Only write out all the fields except 'HashOffset'. We need - // to remember the offset of that field to allow back patching - // later. - for (int I = 0; I < N - 1; I++) + // Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We + // need to remember the offset of these fields to allow back patching later. + for (int I = 0; I < N - 2; I++) OS.write(reinterpret_cast(&Header)[I]); // Save the location of Header.HashOffset field in \c OS. @@ -328,6 +365,13 @@ // Reserve the space for HashOffset field. OS.write(0); + // Save the location of MemProf profile data. This is stored in two parts as + // the schema and as a separate on-disk chained hashtable. + uint64_t MemProfSectionOffset = OS.tell(); + // Reserve space for the MemProf table field to be patched later if this + // profile contains memory profile information. + OS.write(0); + // Reserve space to write profile summary data. uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); @@ -347,6 +391,42 @@ // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); + // Write the MemProf profile data if we have it. This includes a simple schema + // with the format described below followed by the hashtable: + // uint64_t Offset = MemProfGenerator.Emit + // uint64_t Num schema entries + // uint64_t Schema entry 0 + // uint64_t Schema entry 1 + // .... + // uint64_t Schema entry N - 1 + // OnDiskChainedHashTable MemProfFunctionData + uint64_t MemProfSectionStart = 0; + if (static_cast(ProfileKind & InstrProfKind::MemProf)) { + MemProfSectionStart = OS.tell(); + OS.write(0ULL); // Reserve space for the offset. + + auto Schema = memprof::PortableMemInfoBlock::getSchema(); + OS.write(static_cast(Schema.size())); + for (const auto Id : Schema) { + OS.write(static_cast(Id)); + } + + auto MemProfWriter = std::make_unique(); + MemProfWriter->Schema = &Schema; + OnDiskChainedHashTableGenerator + MemProfGenerator; + for (const auto &I : MemProfData) { + // Insert the key (func hash) and value (vector of memprof records). + MemProfGenerator.insert(I.first, I.second); + } + + uint64_t TableOffset = MemProfGenerator.Emit(OS.OS, *MemProfWriter); + PatchItem PatchItems[] = { + {MemProfSectionStart, &TableOffset, 1}, + }; + OS.patch(PatchItems, 1); + } + // Allocate space for data to be serialized out. std::unique_ptr TheSummary = IndexedInstrProf::allocSummary(SummarySize); @@ -369,6 +449,8 @@ PatchItem PatchItems[] = { // Patch the Header.HashOffset field. {HashTableStartFieldOffset, &HashTableStart, 1}, + // Patch the Header.MemProfOffset (=0 for profiles without MemProf data). + {MemProfSectionOffset, &MemProfSectionStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), (int)(SummarySize / sizeof(uint64_t))}, diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -0,0 +1,73 @@ +#include "llvm/ProfileData/MemProf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" + +namespace llvm { +namespace memprof { + +void serializeRecords(const ArrayRef Records, + const MemProfSchema &Schema, raw_ostream &OS) { + using namespace support; + + endian::Writer LE(OS, little); + + LE.write(Records.size()); + for (const MemProfRecord &MR : Records) { + LE.write(MR.CallStack.size()); + for (const MemProfRecord::Frame &F : MR.CallStack) { + F.write(OS); + } + MR.Info.serialize(Schema, OS); + } +} + +SmallVector deserializeRecords(const MemProfSchema &Schema, + const unsigned char *Ptr) { + using namespace support; + + SmallVector Records; + const uint64_t NumRecords = + endian::readNext(Ptr); + for (uint64_t I = 0; I < NumRecords; I++) { + MemProfRecord MR; + const uint64_t NumFrames = + endian::readNext(Ptr); + for (uint64_t J = 0; J < NumFrames; J++) { + const auto F = *reinterpret_cast(Ptr); + Ptr += sizeof(MemProfRecord::Frame); + MR.CallStack.push_back(F); + } + MR.Info.deserialize(Schema, Ptr); + Ptr += PortableMemInfoBlock::serializedSize(); + Records.push_back(MR); + } + return Records; +} + +Expected readMemProfSchema(const unsigned char *&Buffer) { + using namespace support; + + const unsigned char *Ptr = Buffer; + const uint64_t NumSchemaIds = + endian::readNext(Ptr); + if (NumSchemaIds > static_cast(Meta::Size)) { + return make_error(instrprof_error::malformed, + "memprof schema invalid"); + } + + MemProfSchema Result; + for (size_t I = 0; I < NumSchemaIds; I++) { + const uint64_t Tag = endian::readNext(Ptr); + if (Tag >= static_cast(Meta::Size)) { + return make_error(instrprof_error::malformed, + "memprof schema invalid"); + } + Result.push_back(static_cast(Tag)); + } + // Advace the buffer to one past the schema if we succeeded. + Buffer = Ptr; + return Result; +} + +} // namespace memprof +} // namespace llvm diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -362,7 +362,12 @@ for (size_t I = 0; I < DI.getNumberOfFrames(); I++) { const auto &Frame = DI.getFrame(I); Record.CallStack.emplace_back( - std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))), + // We use the function guid which we expect to be a uint64_t. At this + // time, it is the lower 64 bits of the md5 of the function name. Any + // suffix with .llvm. is trimmed since these are added by thinLTO + // global promotion. At the time the profile is consumed, these + // suffixes will not be present. + Function::getGUID(trimSuffix(Frame.FunctionName)), Frame.Line - Frame.StartLine, Frame.Column, // Only the first entry is not an inlined location. I != 0); diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic.profraw b/llvm/test/tools/llvm-profdata/Inputs/basic.profraw new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ +#include +int main(int argc, char **argv) { + char *x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + return 0; +} +``` + +Steps to collect the memprof raw profile and the instrprof raw profile: + +``` +# Collect instrprof profile. +clang -fprofile-generate source.c -o instr.out +./instr.out +mv *.profraw basic.profraw + +# Collect memprof profile. +clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \ + -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \ + -fno-optimize-sibling-calls -m64 -Wl,-build-id source.c -o basic.memprofexe + +env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > basic.memprofraw +``` + +RUN: llvm-profdata merge %p/Inputs/basic.profraw %p/Inputs/basic.memprofraw --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof +RUN: llvm-profdata show %t.prof | FileCheck %s + +For now we only check the validity of the instrumented profile since we don't +have a way to display the contents of the memprof indexed format yet. + +CHECK: Instrumentation level: IR entry_first = 0 +CHECK: Total functions: 1 +CHECK: Maximum function count: 1 +CHECK: Maximum internal block count: 0 + + diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -238,7 +238,7 @@ /// Load an input into a writer context. static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, const InstrProfCorrelator *Correlator, - WriterContext *WC) { + const StringRef ProfiledBinary, WriterContext *WC) { std::unique_lock CtxGuard{WC->Lock}; // Copy the filename, because llvm::ThreadPool copied the input "const @@ -246,6 +246,35 @@ // invalid outside of this packaged task. std::string Filename = Input.Filename; + using ::llvm::memprof::RawMemProfReader; + if (RawMemProfReader::hasFormat(Input.Filename)) { + auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary); + if (!ReaderOrErr) { + exitWithError(ReaderOrErr.takeError(), Input.Filename); + } + std::unique_ptr Reader = std::move(ReaderOrErr.get()); + // Check if the profile types can be merged, e.g. clang frontend profiles + // should not be merged with memprof profiles. + if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { + consumeError(std::move(E)); + WC->Errors.emplace_back( + make_error( + "Cannot merge MemProf profile with Clang generated profile.", + std::error_code()), + Filename); + return; + } + + // Add the records into the writer context. + for (const memprof::MemProfRecord &MR : *Reader) { + WC->Writer.addRecord(MR, [&](Error E) { + instrprof_error IPE = InstrProfError::take(std::move(E)); + WC->Errors.emplace_back(make_error(IPE), Filename); + }); + } + return; + } + auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. @@ -331,7 +360,8 @@ SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, - unsigned NumThreads, FailureMode FailMode) { + unsigned NumThreads, FailureMode FailMode, + const StringRef ProfiledBinary) { if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) exitWithError("unknown format is specified"); @@ -364,14 +394,15 @@ if (NumThreads == 1) { for (const auto &Input : Inputs) - loadInput(Input, Remapper, Correlator.get(), Contexts[0].get()); + loadInput(Input, Remapper, Correlator.get(), ProfiledBinary, + Contexts[0].get()); } else { ThreadPool Pool(hardware_concurrency(NumThreads)); // Load the inputs in parallel (N/NumThreads serial steps). unsigned Ctx = 0; for (const auto &Input : Inputs) { - Pool.async(loadInput, Input, Remapper, Correlator.get(), + Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary, Contexts[Ctx].get()); Ctx = (Ctx + 1) % NumThreads; } @@ -588,7 +619,7 @@ SmallSet WriterErrorCodes; auto WC = std::make_unique(OutputSparse, ErrorLock, WriterErrorCodes); - loadInput(Inputs[0], nullptr, nullptr, WC.get()); + loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get()); if (WC->Errors.size() > 0) exitWithError(std::move(WC->Errors[0].first), InstrFilename); @@ -968,6 +999,9 @@ cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); + cl::opt ProfiledBinary( + "profiled-binary", cl::init(""), + cl::desc("Path to binary from which the profile was collected.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -1010,7 +1044,7 @@ if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(), OutputFilename, OutputFormat, OutputSparse, NumThreads, - FailureMode); + FailureMode, ProfiledBinary); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, @@ -1041,7 +1075,7 @@ OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n"; exit(0); } - loadInput(WeightedInput, nullptr, nullptr, &Context); + loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context); overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS, IsCS); Overlap.dump(OS); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -12,6 +12,7 @@ #include "llvm/IR/Module.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Compression.h" #include "llvm/Testing/Support/Error.h" #include "llvm/Testing/Support/SupportHelpers.h" @@ -221,6 +222,67 @@ ASSERT_EQ(0U, R->Counts[1]); } +TEST_F(InstrProfTest, test_memprof) { + ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), + Succeeded()); + llvm::memprof::MemProfRecord MR; + MR.CallStack.push_back({0x123, 1, 2, false}); + MR.CallStack.push_back({0x345, 3, 4, true}); + Writer.addRecord(MR, Err); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + auto RecordsOr = Reader->getMemProfRecord(0x123); + ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded()); + const auto Records = RecordsOr.get(); + ASSERT_EQ(Records.size(), 1U); + EXPECT_EQ(Records[0], MR); +} + +TEST_F(InstrProfTest, test_memprof_merge) { + Writer.addRecord({"func1", 0x1234, {42}}, Err); + + InstrProfWriter Writer2; + ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::MemProf), + Succeeded()); + + llvm::memprof::MemProfRecord MR; + MR.CallStack.push_back({0x123, 1, 2, false}); + MR.CallStack.push_back({0x345, 3, 4, true}); + Writer2.addRecord(MR, Err); + + ASSERT_THAT_ERROR(Writer.mergeProfileKind(Writer2.getProfileKind()), + Succeeded()); + Writer.mergeRecordsFromWriter(std::move(Writer2), Err); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + Expected R = Reader->getInstrProfRecord("func1", 0x1234); + EXPECT_THAT_ERROR(R.takeError(), Succeeded()); + ASSERT_EQ(1U, R->Counts.size()); + ASSERT_EQ(42U, R->Counts[0]); + + auto RecordsOr = Reader->getMemProfRecord(0x123); + ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded()); + const auto Records = RecordsOr.get(); + ASSERT_EQ(Records.size(), 1U); + EXPECT_EQ(Records[0], MR); +} + +TEST_F(InstrProfTest, test_memprof_invalid_add_record) { + llvm::memprof::MemProfRecord MR; + // At least one of the frames should be a non-inline frame. + MR.CallStack.push_back({0x123, 1, 2, true}); + MR.CallStack.push_back({0x345, 3, 4, true}); + + auto CheckErr = [](Error &&E) { + EXPECT_TRUE(ErrorEquals(instrprof_error::invalid_prof, std::move(E))); + }; + Writer.addRecord(MR, CheckErr); +} + static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; static const char callee3[] = "callee3"; diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -89,8 +89,8 @@ DILineInfoSpecifier::FunctionNameKind::LinkageName); } -MATCHER_P4(FrameContains, Function, LineOffset, Column, Inline, "") { - const std::string ExpectedHash = std::to_string(llvm::MD5Hash(Function)); +MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") { + const uint64_t ExpectedHash = llvm::Function::getGUID(FunctionName); if (arg.Function != ExpectedHash) { *result_listener << "Hash mismatch"; return false; @@ -103,6 +103,22 @@ return false; } +MATCHER_P(EqualsRecord, Want, "") { + if (arg == Want) + return true; + + std::string Explanation; + llvm::raw_string_ostream OS(Explanation); + OS << "\n Want: \n"; + Want.print(OS); + OS << "\n Got: \n"; + arg.print(OS); + OS.flush(); + + *result_listener << Explanation; + return false; +} + MemProfSchema getFullSchema() { MemProfSchema Schema; #define MIBEntryDef(NameTag, Name, Type) Schema.push_back(Meta::Name); @@ -179,9 +195,43 @@ // Here we compare directly with the actual counts instead of MemInfoBlock // members. Since the MemInfoBlock struct is packed and the EXPECT_EQ macros // take a reference to the params, this results in unaligned accesses. - EXPECT_EQ(1, ReadBlock.getAllocCount()); - EXPECT_EQ(7, ReadBlock.getTotalAccessCount()); - EXPECT_EQ(3, ReadBlock.getAllocCpuId()); + EXPECT_EQ(1UL, ReadBlock.getAllocCount()); + EXPECT_EQ(7ULL, ReadBlock.getTotalAccessCount()); + EXPECT_EQ(3UL, ReadBlock.getAllocCpuId()); } +TEST(MemProf, RecordSerializationRoundTrip) { + const MemProfSchema Schema = getFullSchema(); + + llvm::SmallVector Records; + MemProfRecord MR; + + MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, + /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, + /*dealloc_cpu=*/4); + + MR.Info = PortableMemInfoBlock(Info); + MR.CallStack.push_back({0x123, 1, 2, false}); + MR.CallStack.push_back({0x345, 3, 4, false}); + Records.push_back(MR); + + MR.clear(); + MR.Info = PortableMemInfoBlock(Info); + MR.CallStack.push_back({0x567, 5, 6, false}); + MR.CallStack.push_back({0x789, 7, 8, false}); + Records.push_back(MR); + + std::string Buffer; + llvm::raw_string_ostream OS(Buffer); + serializeRecords(Records, Schema, OS); + OS.flush(); + + const llvm::SmallVector GotRecords = deserializeRecords( + Schema, reinterpret_cast(Buffer.data())); + + ASSERT_TRUE(!GotRecords.empty()); + EXPECT_EQ(GotRecords.size(), Records.size()); + EXPECT_THAT(GotRecords[0], EqualsRecord(Records[0])); + EXPECT_THAT(GotRecords[1], EqualsRecord(Records[1])); +} } // namespace