diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -650,7 +650,7 @@ /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 7 +#define INSTR_PROF_INDEX_VERSION 8 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h --- a/llvm/include/llvm/IR/ProfileSummary.h +++ b/llvm/include/llvm/IR/ProfileSummary.h @@ -44,7 +44,7 @@ class ProfileSummary { public: - enum Kind { PSK_Instr, PSK_CSInstr, PSK_Sample }; + enum Kind { PSK_Instr, PSK_CSInstr, PSK_Sample, PSK_MemProf }; private: const Kind PSK; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -287,7 +287,8 @@ CS = 0x8, // A context sensitive IR-level profile. SingleByteCoverage = 0x10, // Use single byte probes for coverage. FunctionEntryOnly = 0x20, // Only instrument the function entry basic block. - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly) + MemProf = 0x30, // A memory profile collected using -fmemory-profile. + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/MemProf) }; const std::error_category &instrprof_category(); @@ -1011,7 +1012,9 @@ Version6 = 6, // An additional counter is added around logical operators. Version7 = 7, - // The current version is 7. + // An additional (optional) memory profile type is added. + Version8 = 8, + // The current version is 8. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1028,6 +1031,7 @@ uint64_t Unused; // Becomes unused since version 4 uint64_t HashType; uint64_t HashOffset; + uint64_t MemProfOffset; // New fields should only be added at the end to ensure that the size // computation is correct. The methods below need to be updated to ensure that // the new field is read correctly. diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -650,7 +650,7 @@ /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 7 +#define INSTR_PROF_INDEX_VERSION 8 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 @@ -662,6 +662,7 @@ * The 59th bit indicates whether to use debug info to correlate profiles. * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. + * The 62nd bit indicates whether memory profile information is present. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) @@ -671,6 +672,7 @@ #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59) #define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) #define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) +#define VARIANT_MASK_MEMPROF (0x1ULL << 62) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -19,6 +19,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" @@ -489,6 +490,9 @@ using OnDiskHashTableImplV3 = OnDiskIterableChainedHashTable; +using MemProfHashTable = + OnDiskIterableChainedHashTable; + template class InstrProfReaderItaniumRemapper; @@ -592,14 +596,22 @@ std::unique_ptr Summary; /// Context sensitive profile summary data. std::unique_ptr CS_Summary; + /// MemProf profile summary (if available). + std::unique_ptr MemProfSummary; + /// MemProf profile schema (if available). + memprof::MemProfSchema Schema; + /// MemProf profile data on-disk indexed via llvm::md5(FunctionName). + std::unique_ptr MemProfTable; + // Index to the current record in the record array. unsigned RecordIndex; // Read the profile summary. Return a pointer pointing to one byte past the // end of the summary data if it exists or the input \c Cur. - // \c UseCS indicates whether to use the context-sensitive profile summary. + // \c SummaryType specifies which summary pointer to populate after reading. const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, - const unsigned char *Cur, bool UseCS); + const unsigned char *Cur, + const ProfileSummary::Kind SummaryType); public: IndexedInstrProfReader( @@ -645,6 +657,11 @@ Expected getInstrProfRecord(StringRef FuncName, uint64_t FuncHash); + /// Return the memprof recrods for the function identified by + /// llvm::md5(Name). + Expected> + getMemProfRecord(uint64_t FuncNameHash); + /// Fill Counts with the profile data for the given function name. Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts); diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" @@ -37,6 +38,11 @@ private: bool Sparse; StringMap FunctionData; + + // A map to hold memprof data per function. The lower 64 bits obtained from + // the md5 hash of the function name is used to index into the map. + memprof::FunctionMemProfMap MemProfData; + // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. @@ -57,6 +63,9 @@ addRecord(std::move(I), 1, Warn); } + void addRecord(const ::llvm::memprof::MemProfRecord &MR, + function_ref Warn); + /// Merge existing function counts from the given writer. void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn); @@ -112,6 +121,8 @@ return Error::success(); } + InstrProfKind getProfileKind() const { return ProfileKind; } + // Internal interface for testing purpose only. void setValueProfDataEndianness(support::endianness Endianness); void setOutputSparse(bool Sparse); diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -5,6 +5,11 @@ #include #include +#include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" + +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/Support/raw_ostream.h" @@ -12,7 +17,7 @@ namespace llvm { namespace memprof { -enum class Meta { +enum class Meta : uint64_t { Start = 0, #define MIBEntryDef(NameTag, Name, Type) NameTag, #include "llvm/ProfileData/MIBEntryDef.inc" @@ -20,10 +25,16 @@ Size }; +using MemProfSchema = llvm::SmallVector(Meta::Size)>; + struct PortableMemInfoBlock { PortableMemInfoBlock() {} PortableMemInfoBlock(const MemInfoBlock &Block) : Info(Block) {} - PortableMemInfoBlock(const llvm::SmallVectorImpl &Schema, char *Ptr) { + PortableMemInfoBlock(const MemProfSchema &Schema, char *Ptr) { + deserialize(Schema, Ptr); + } + + void deserialize(const MemProfSchema &Schema, char *Ptr) { for (const Meta Id : Schema) { switch (Id) { #define MIBEntryDef(NameTag, Name, Type) \ @@ -40,7 +51,7 @@ } } - void serialize(const llvm::SmallVectorImpl &Schema, char *Ptr) { + char *serialize(const MemProfSchema &Schema, char *Ptr) const { for (const Meta Id : Schema) { switch (Id) { #define MIBEntryDef(NameTag, Name, Type) \ @@ -54,6 +65,7 @@ llvm_unreachable("Unknown meta type id, invalid input?"); } } + return Ptr; } void printYAML(raw_ostream &OS) const { @@ -66,27 +78,54 @@ // Define getters for each type which can be called by analyses. #define MIBEntryDef(NameTag, Name, Type) \ - Type get##Name() { return Info.Name; } + Type get##Name() const { return Info.Name; } #include "llvm/ProfileData/MIBEntryDef.inc" #undef MIBEntryDef void clear() { Info = MemInfoBlock(); } + static MemProfSchema getSchema() { + MemProfSchema List; +#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name); +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + return List; + } + + bool operator==(const PortableMemInfoBlock &Other) const { + bool IsSame = true; +#define MIBEntryDef(NameTag, Name, Type) \ + IsSame &= (Other.get##Name() == get##Name()); +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + return IsSame; + } + + bool operator!=(const PortableMemInfoBlock &Other) const { + return !operator==(Other); + } + private: MemInfoBlock Info; }; struct MemProfRecord { - struct Frame { - std::string Function; + PACKED(struct Frame { + uint64_t Function; uint32_t LineOffset; uint32_t Column; bool IsInlineFrame; - Frame(std::string Str, uint32_t Off, uint32_t Col, bool Inline) - : Function(std::move(Str)), LineOffset(Off), Column(Col), - IsInlineFrame(Inline) {} - }; + Frame(uint64_t Hash, uint32_t Off, uint32_t Col, bool Inline) + : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} + + bool operator==(const Frame &Other) const { + return Other.Function == Function && Other.LineOffset == LineOffset && + Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; + } + + bool operator!=(const Frame &Other) const { return !operator==(Other); } + }); std::vector CallStack; PortableMemInfoBlock Info; @@ -96,6 +135,12 @@ Info.clear(); } + size_t serializedSize() const { + return sizeof(uint64_t) + // The number of frames to serialize. + sizeof(Frame) * CallStack.size() + // The contents of the frames. + sizeof(PortableMemInfoBlock); // The size of the payload. + } + // Prints out the contents of the memprof record in YAML. void print(llvm::raw_ostream &OS) const { OS << " Callstack:\n"; @@ -111,6 +156,160 @@ Info.printYAML(OS); } + + bool operator==(const MemProfRecord &Other) const { + if (Other.Info != Info) + return false; + + if (Other.CallStack.size() != CallStack.size()) + return false; + + for (size_t I = 0; I < Other.CallStack.size(); I++) { + if (Other.CallStack[I] != CallStack[I]) + return false; + } + return true; + } +}; + +// Serializes the memprof records in \p Records to the ostream \p OS based on +// the schema provided in \p Schema. +void serializeRecords(const ArrayRef Records, + const MemProfSchema &Schema, raw_ostream &OS); + +// Deserializes memprof records from the Buffer +SmallVector deserializeRecords(const MemProfSchema &Schema, + const unsigned char *Buffer); + +// Reads a memprof schema from a buffer. All entries in the buffer are +// interpreted as uint64_t. The first entry in the buffer denotes the number of +// ids in the schema. Subsequent entries are integers which map to memprof::Meta +// enum class entries. After successfully reading the schema, the pointer is one +// byte past the schema contents. +Expected readMemProfSchema(const unsigned char *&Buffer); + +using FunctionMemProfMap = + DenseMap>; + +class MemProfSummaryBuilder final : public llvm::ProfileSummaryBuilder { +public: + MemProfSummaryBuilder(std::vector Cutoffs) + : ProfileSummaryBuilder(std::move(Cutoffs)) {} + + void addRecord(const uint64_t FuncHash, + const ArrayRef &Records) { + // TODO: Do something here. + } + + std::unique_ptr getSummary() { + // TODO: Return something meaningful. + return std::make_unique(ProfileSummary::PSK_MemProf, + SummaryEntryVector(), 0, 0, 0, 0, 0, + 0, 0, 0); + } +}; + +/// Trait for lookups into the on-disk hash table for memprof format in the +/// indexed profile. +class MemProfRecordLookupTrait { +public: + using data_type = ArrayRef; + using internal_key_type = uint64_t; + using external_key_type = uint64_t; + using hash_value_type = uint64_t; + using offset_type = uint64_t; + + MemProfRecordLookupTrait() = delete; + MemProfRecordLookupTrait(const MemProfSchema &S) : Schema(S) {} + + static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } + static uint64_t GetInternalKey(uint64_t K) { return K; } + static uint64_t GetExternalKey(uint64_t K) { return K; } + + hash_value_type ComputeHash(uint64_t K) { return K; } + + static std::pair + ReadKeyDataLength(const unsigned char *&D) { + using namespace support; + + offset_type KeyLen = endian::readNext(D); + offset_type DataLen = endian::readNext(D); + return std::make_pair(KeyLen, DataLen); + } + + uint64_t ReadKey(const unsigned char *D, offset_type) { + return *reinterpret_cast(D); + } + + data_type ReadData(uint64_t K, const unsigned char *D, offset_type N) { + Records = deserializeRecords(Schema, D); + return Records; + } + +private: + // Holds the memprof schema used to deserialize records. + MemProfSchema Schema; + // Holds the records from one function deserialized from the indexed format. + llvm::SmallVector Records; +}; + +class MemProfRecordWriterTrait { +public: + using key_type = uint64_t; + using key_type_ref = uint64_t; + + using data_type = ArrayRef; + using data_type_ref = ArrayRef; + + using hash_value_type = uint64_t; + using offset_type = uint64_t; + + MemProfSummaryBuilder *SummaryBuilder = nullptr; + MemProfSchema *Schema = nullptr; + + MemProfRecordWriterTrait() = default; + + static hash_value_type ComputeHash(key_type_ref K) { return K; } + + static std::pair + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace support; + + endian::Writer LE(Out, little); + + offset_type N = sizeof(K); + LE.write(N); + + offset_type M = 0; + + M += sizeof(uint64_t); + for (const auto &Record : V) { + M += Record.serializedSize(); + } + + LE.write(M); + return std::make_pair(N, M); + } + + void EmitKey(raw_ostream &Out, key_type_ref K, offset_type) { + using namespace support; + endian::Writer LE(Out, little); + LE.write(K); + } + + void EmitData(raw_ostream &Out, key_type_ref K, data_type_ref V, + offset_type) { + using namespace support; + + assert(SummaryBuilder != nullptr && + "MemProf summary builder is not initialized!"); + assert(Schema != nullptr && "MemProf schema is not initialized!"); + + endian::Writer LE(Out, little); + + SummaryBuilder->addRecord(K, V); + serializeRecords(V, *Schema, Out); + } }; } // namespace memprof diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -66,6 +66,9 @@ return Iterator(this); } + // The RawMemProfReader only holds memory profile information. + InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } + // Constructor for unittests only. RawMemProfReader(std::unique_ptr Sym, llvm::SmallVectorImpl &Seg, diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -4,6 +4,7 @@ InstrProfCorrelator.cpp InstrProfReader.cpp InstrProfWriter.cpp + MemProf.cpp ProfileSummaryBuilder.cpp SampleProf.cpp SampleProfReader.cpp diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1345,8 +1345,11 @@ return make_error(instrprof_error::unsupported_version); switch (GET_VERSION(FormatVersion)) { - // When a new field is added in the header add a case statement here to - // populate it. + // When a new field is added in the header add a case statement here to + // populate it. + case 8ull: + H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset)); + LLVM_FALLTHROUGH; default: H.HashType = read(Buffer, offsetOf(&Header::HashType)); H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset)); @@ -1357,9 +1360,11 @@ size_t Header::size() const { switch (GET_VERSION(Version)) { - // When a new field is added to the header add a case statement here to - // compute the size as offset of the new field + size of the new field. This - // relies on the field being added to the end of the list. + // When a new field is added to the header add a case statement here to + // compute the size as offset of the new field + size of the new field. This + // relies on the field being added to the end of the list. + case 8ull: + return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset); default: return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset); } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -19,7 +19,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/ProfileData/RawMemProfReader.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" @@ -841,7 +843,8 @@ const unsigned char * IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, - const unsigned char *Cur, bool UseCS) { + const unsigned char *Cur, + const ProfileSummary::Kind SummaryType) { using namespace IndexedInstrProf; using namespace support; @@ -868,18 +871,30 @@ DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, Ent.NumBlocks); } - std::unique_ptr &Summary = - UseCS ? this->CS_Summary : this->Summary; // initialize InstrProfSummary using the SummaryData from disk. - Summary = std::make_unique( - UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, - DetailedSummary, SummaryData->get(Summary::TotalBlockCount), + auto S = std::make_unique( + SummaryType, DetailedSummary, + SummaryData->get(Summary::TotalBlockCount), SummaryData->get(Summary::MaxBlockCount), SummaryData->get(Summary::MaxInternalBlockCount), SummaryData->get(Summary::MaxFunctionCount), SummaryData->get(Summary::TotalNumBlocks), SummaryData->get(Summary::TotalNumFunctions)); + + switch (SummaryType) { + case ProfileSummary::PSK_Instr: + Summary = std::move(S); + break; + case ProfileSummary::PSK_CSInstr: + CS_Summary = std::move(S); + break; + case ProfileSummary::PSK_MemProf: + MemProfSummary = std::move(S); + break; + default: + llvm_unreachable("Unknown profile summary type!"); + } return Cur + SummarySize; } else { // The older versions do not support a profile summary. This just computes @@ -910,10 +925,13 @@ Cur += Header->size(); Cur = readSummary((IndexedInstrProf::ProfVersion)Header->Version, Cur, - /* UseCS */ false); + ProfileSummary::PSK_Instr); if (Header->Version & VARIANT_MASK_CSIR_PROF) Cur = readSummary((IndexedInstrProf::ProfVersion)Header->Version, Cur, - /* UseCS */ true); + ProfileSummary::PSK_CSInstr); + if (Header->Version & VARIANT_MASK_MEMPROF) + Cur = readSummary((IndexedInstrProf::ProfVersion)Header->Version, Cur, + ProfileSummary::PSK_MemProf); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( @@ -923,10 +941,37 @@ uint64_t HashOffset = endian::byte_swap(Header->HashOffset); - // The rest of the file is an on disk hash table. + // The hash table with profile counts comes next. auto IndexPtr = std::make_unique>( Start + HashOffset, Cur, Start, HashType, Header->Version); + // The MemProfOffset field in the header is only valid when the format version + // is higher than 8 (when it was introduced). + if (GET_VERSION(Header->Version) >= 8 && + Header->Version & VARIANT_MASK_MEMPROF) { + uint64_t MemProfOffset = + endian::byte_swap(Header->MemProfOffset); + assert(MemProfSummary && + "MemProf offset is non-zero but no summary info available!"); + + const unsigned char *Ptr = Start + MemProfOffset; + // The value returned from Generator.Emit. + const uint64_t TableOffset = + support::endian::readNext(Ptr); + + // Read the schema. + auto SchemaOr = memprof::readMemProfSchema(Ptr); + if (!SchemaOr) + return SchemaOr.takeError(); + Schema = SchemaOr.get(); + + // Now initialize the table reader with a pointer into data buffer. + MemProfTable.reset(MemProfHashTable::Create( + /*Buckets=*/Start + TableOffset, + /*Payload=*/Ptr, + /*Base=*/Start, memprof::MemProfRecordLookupTrait(Schema))); + } + // Load the remapping table now if requested. if (RemappingBuffer) { Remapper = std::make_unique< @@ -971,6 +1016,16 @@ return error(instrprof_error::hash_mismatch); } +Expected> +IndexedInstrProfReader::getMemProfRecord(uint64_t FuncNameHash) { + auto Iter = MemProfTable->find(FuncNameHash); + if (Iter == MemProfTable->end()) + // TODO: Add memprof specific errors. + return make_error(instrprof_error::hash_mismatch, + "memprof record not found"); + return *Iter; +} + Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -249,11 +250,37 @@ Dest.sortValueData(); } +using ::llvm::memprof::MemProfRecord; +void InstrProfWriter::addRecord(const MemProfRecord &MR, + function_ref Warn) { + // Use 0 as a sentinel value since its highly unlikely that the lower 64-bits + // of a 128 bit md5 hash will be all zeros. + // TODO: Move this Key frame detection to the contructor to avoid having to + // scan all the callstacks again when adding a new record. + uint64_t Key = 0; + for (const auto &Frame : MR.CallStack) { + if (!Frame.IsInlineFrame) + Key = Frame.Function; + } + + if (Key == 0) { + Warn(make_error( + instrprof_error::invalid_prof, + "could not determine leaf function for memprof record.")); + } + + MemProfData[Key].push_back(MR); +} + void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn) { for (auto &I : IPW.FunctionData) for (auto &Func : I.getValue()) addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); + + for (auto &I : IPW.MemProfData) + for (const auto &MR : I.second) + addRecord(MR, Warn); } bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { @@ -289,15 +316,28 @@ OnDiskChainedHashTableGenerator Generator; + auto MemProfWriter = std::make_unique(); + OnDiskChainedHashTableGenerator + MemProfGenerator; + InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs); InfoObj->SummaryBuilder = &ISB; InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs); InfoObj->CSSummaryBuilder = &CSISB; + memprof::MemProfSummaryBuilder MemProfSB( + ProfileSummaryBuilder::DefaultCutoffs); + MemProfWriter->SummaryBuilder = &MemProfSB; + // Populate the hash table generator. for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) Generator.insert(I.getKey(), &I.getValue()); + + for (const auto &I : MemProfData) { + // Insert the key (func hash) and value (vector of memprof records). + MemProfGenerator.insert(I.first, I.second); + } // Write the header. IndexedInstrProf::Header Header; Header.Magic = IndexedInstrProf::Magic; @@ -312,16 +352,18 @@ Header.Version |= VARIANT_MASK_BYTE_COVERAGE; if (static_cast(ProfileKind & InstrProfKind::FunctionEntryOnly)) Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; + if (static_cast(ProfileKind & InstrProfKind::MemProf)) + Header.Version |= VARIANT_MASK_MEMPROF; Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; + Header.MemProfOffset = 0; int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); - // Only write out all the fields except 'HashOffset'. We need - // to remember the offset of that field to allow back patching - // later. - for (int I = 0; I < N - 1; I++) + // Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We + // need to remember the offset of these field to allow back patching later. + for (int I = 0; I < N - 2; I++) OS.write(reinterpret_cast(&Header)[I]); // Save the location of Header.HashOffset field in \c OS. @@ -329,6 +371,13 @@ // Reserve the space for HashOffset field. OS.write(0); + // Save the location of MemProf profile data. This is stored in two parts as + // the schema and as a separate on-disk chained hashtable. + uint64_t MemProfSectionOffset = OS.tell(); + // Reserve the spec for MemProf table field to be patched later if this + // profile contains memory profile information. + OS.write(0); + // Reserve space to write profile summary data. uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); @@ -345,9 +394,46 @@ OS.write(0); } + uint64_t MemProfSummaryOffset = 0; + uint64_t MemProfSummarySize = 0; + if (static_cast(ProfileKind & InstrProfKind::MemProf)) { + MemProfSummaryOffset = OS.tell(); + MemProfSummarySize = SummarySize / sizeof(uint64_t); + for (unsigned I = 0; I < MemProfSummarySize; I++) + OS.write(0); + } + // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); + // Write the MemProf profile data if we have it. This includes a simple schema + // with the format described below followed by the hashtable: + // uint64_t Offset = MemProfGenerator.Emit + // uint64_t Num schema entries + // uint64_t Schema entry 0 + // uint64_t Schema entry 1 + // .... + // uint64_t Schema entry N - 1 + // OnDiskChainedHashTable MemProfFunctionData + uint64_t MemProfSectionStart = 0; + if (static_cast(ProfileKind & InstrProfKind::MemProf)) { + MemProfSectionStart = OS.tell(); + OS.write(0ULL); // Reserve space for the offset. + + auto Schema = memprof::PortableMemInfoBlock::getSchema(); + OS.write(static_cast(Schema.size())); + for (const auto Id : Schema) { + OS.write(static_cast(Id)); + } + MemProfWriter->Schema = &Schema; + + uint64_t Offset = MemProfGenerator.Emit(OS.OS, *MemProfWriter); + PatchItem PatchItems[] = { + {MemProfSectionStart, &Offset, 1}, + }; + OS.patch(PatchItems, 1); + } + // Allocate space for data to be serialized out. std::unique_ptr TheSummary = IndexedInstrProf::allocSummary(SummarySize); @@ -366,15 +452,31 @@ } InfoObj->CSSummaryBuilder = nullptr; + // For MemProf summary. + std::unique_ptr TheMemProfSummary = nullptr; + if (static_cast(ProfileKind & InstrProfKind::MemProf)) { + // TODO: Document the SummarySize/MemProfSummarySize gotcha here. + TheMemProfSummary = IndexedInstrProf::allocSummary(SummarySize); + std::unique_ptr MemProfPS = MemProfSB.getSummary(); + setSummary(TheMemProfSummary.get(), *MemProfPS); + } + MemProfWriter->SummaryBuilder = nullptr; + // Now do the final patch: PatchItem PatchItems[] = { // Patch the Header.HashOffset field. {HashTableStartFieldOffset, &HashTableStart, 1}, + // Patch the Header.MemProfOffset (=0 for profiles without MemProf data). + {MemProfSectionOffset, &MemProfSectionStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), (int)(SummarySize / sizeof(uint64_t))}, {CSSummaryOffset, reinterpret_cast(TheCSSummary.get()), - (int)CSSummarySize}}; + (int)CSSummarySize}, + {MemProfSummaryOffset, + reinterpret_cast(TheMemProfSummary.get()), + (int)MemProfSummarySize}, + }; OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems)); diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -0,0 +1,89 @@ +#include "llvm/ProfileData/MemProf.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace memprof { +namespace { +template void writeBytes(T Pod, char *&Buffer) { + *(T *)Buffer = Pod; + Buffer += sizeof(T); +} + +template T readBytes(unsigned char *&Ptr) { + T Value = *reinterpret_cast(Ptr); + Ptr += sizeof(T); + return Value; +} +} // namespace + +void serializeRecords(const ArrayRef Records, + const MemProfSchema &Schema, raw_ostream &OS) { + // We initialize the size of 8b to hold the number of records. + uint64_t SerializedSize = sizeof(uint64_t); + for (const MemProfRecord &MR : Records) { + SerializedSize += MR.serializedSize(); + } + + // TODO: Reduce the number of allocations here by reusing the same memory + // across functions by caching as global var and memset to zero. + char *Buffer = (char *)calloc(SerializedSize, sizeof(char)); + char *Ptr = Buffer; + writeBytes(static_cast(Records.size()), Ptr); + for (const MemProfRecord &MR : Records) { + writeBytes(static_cast(MR.CallStack.size()), Ptr); + for (const MemProfRecord::Frame &F : MR.CallStack) { + writeBytes(F, Ptr); + } + Ptr = MR.Info.serialize(Schema, Ptr); + } + + OS.write(Buffer, SerializedSize); + free(Buffer); +} + +SmallVector deserializeRecords(const MemProfSchema &Schema, + const unsigned char *Buffer) { + SmallVector Records; + unsigned char *Ptr = const_cast(Buffer); + const uint64_t NumRecords = readBytes(Ptr); + for (uint64_t I = 0; I < NumRecords; I++) { + MemProfRecord MR; + const uint64_t NumFrames = readBytes(Ptr); + for (uint64_t J = 0; J < NumFrames; J++) { + const auto F = readBytes(Ptr); + MR.CallStack.push_back(F); + } + MR.Info.deserialize(Schema, reinterpret_cast(Ptr)); + Ptr += sizeof(PortableMemInfoBlock); + Records.push_back(MR); + } + return Records; +} + +Expected readMemProfSchema(const unsigned char *&Buffer) { + using namespace support; + + const unsigned char *Ptr = Buffer; + const uint64_t NumSchemaIds = + endian::readNext(Ptr); + if (NumSchemaIds > static_cast(Meta::Size)) { + return make_error(instrprof_error::malformed, + "memprof schema invalid"); + } + + MemProfSchema Result; + for (size_t I = 0; I < NumSchemaIds; I++) { + const uint64_t Tag = endian::readNext(Ptr); + if (Tag >= static_cast(Meta::Size)) { + return make_error(instrprof_error::malformed, + "memprof schema invalid"); + } + Result.push_back(static_cast(Tag)); + } + // Advace the buffer to one past the schema if we succeeded. + Buffer = Ptr; + return Result; +} + +} // namespace memprof +} // namespace llvm diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -344,7 +344,7 @@ for (size_t I = 0; I < DI.getNumberOfFrames(); I++) { const auto &Frame = DI.getFrame(I); Record.CallStack.emplace_back( - std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))), + llvm::MD5Hash(trimSuffix(Frame.FunctionName)), Frame.Line - Frame.StartLine, Frame.Column, // Only the first entry is not an inlined location. I != 0); diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -235,10 +235,12 @@ } } +using ::llvm::memprof::MemProfRecord; +using ::llvm::memprof::RawMemProfReader; /// Load an input into a writer context. static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, const InstrProfCorrelator *Correlator, - WriterContext *WC) { + const StringRef ProfiledBinary, WriterContext *WC) { std::unique_lock CtxGuard{WC->Lock}; // Copy the filename, because llvm::ThreadPool copied the input "const @@ -246,6 +248,32 @@ // invalid outside of this packaged task. std::string Filename = Input.Filename; + if (RawMemProfReader::hasFormat(Input.Filename)) { + auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary); + if (!ReaderOrErr) { + exitWithError(ReaderOrErr.takeError(), Input.Filename); + } + std::unique_ptr Reader = std::move(ReaderOrErr.get()); + // Check if the profile types can be merged. + if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { + consumeError(std::move(E)); + WC->Errors.emplace_back( + make_error( + "Cannot merge MEM profile with Clang generated profile.", + std::error_code()), + Filename); + return; + } + + // Add the records into the writer context. + for (const MemProfRecord &MR : *Reader) { + if (Error E = WC->Writer.addRecord(MR)) { + exitWithError(std::move(E), Input.Filename); + } + } + return; + } + auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. @@ -285,6 +313,7 @@ FuncName, firstTime); }); } + if (Reader->hasError()) if (Error E = Reader->getError()) WC->Errors.emplace_back(std::move(E), Filename); @@ -331,7 +360,8 @@ SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, - unsigned NumThreads, FailureMode FailMode) { + unsigned NumThreads, FailureMode FailMode, + const StringRef ProfiledBinary) { if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) exitWithError("unknown format is specified"); @@ -364,14 +394,15 @@ if (NumThreads == 1) { for (const auto &Input : Inputs) - loadInput(Input, Remapper, Correlator.get(), Contexts[0].get()); + loadInput(Input, Remapper, Correlator.get(), ProfiledBinary, + Contexts[0].get()); } else { ThreadPool Pool(hardware_concurrency(NumThreads)); // Load the inputs in parallel (N/NumThreads serial steps). unsigned Ctx = 0; for (const auto &Input : Inputs) { - Pool.async(loadInput, Input, Remapper, Correlator.get(), + Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary, Contexts[Ctx].get()); Ctx = (Ctx + 1) % NumThreads; } @@ -588,7 +619,7 @@ SmallSet WriterErrorCodes; auto WC = std::make_unique(OutputSparse, ErrorLock, WriterErrorCodes); - loadInput(Inputs[0], nullptr, nullptr, WC.get()); + loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get()); if (WC->Errors.size() > 0) exitWithError(std::move(WC->Errors[0].first), InstrFilename); @@ -968,6 +999,9 @@ cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); + cl::opt ProfiledBinary( + "profiled-binary", cl::init(""), + cl::desc("Path to binary from which the profile was collected.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -1010,7 +1044,7 @@ if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(), OutputFilename, OutputFormat, OutputSparse, NumThreads, - FailureMode); + FailureMode, ProfiledBinary); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, @@ -1041,7 +1075,7 @@ OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n"; exit(0); } - loadInput(WeightedInput, nullptr, nullptr, &Context); + loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context); overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS, IsCS); Overlap.dump(OS); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -12,6 +12,7 @@ #include "llvm/IR/Module.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Compression.h" #include "llvm/Testing/Support/Error.h" #include "llvm/Testing/Support/SupportHelpers.h" @@ -221,6 +222,67 @@ ASSERT_EQ(0U, R->Counts[1]); } +TEST_F(InstrProfTest, test_memprof) { + ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), + Succeeded()); + llvm::memprof::MemProfRecord MR; + MR.CallStack.push_back({0x123, 1, 2, false}); + MR.CallStack.push_back({0x345, 3, 4, true}); + Writer.addRecord(MR, Err); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + auto RecordsOr = Reader->getMemProfRecord(0x123); + ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded()); + const auto Records = RecordsOr.get(); + ASSERT_EQ(Records.size(), 1U); + EXPECT_EQ(Records[0], MR); +} + +TEST_F(InstrProfTest, test_memprof_merge) { + Writer.addRecord({"func1", 0x1234, {42}}, Err); + + InstrProfWriter Writer2; + ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::MemProf), + Succeeded()); + + llvm::memprof::MemProfRecord MR; + MR.CallStack.push_back({0x123, 1, 2, false}); + MR.CallStack.push_back({0x345, 3, 4, true}); + Writer2.addRecord(MR, Err); + + ASSERT_THAT_ERROR(Writer.mergeProfileKind(Writer2.getProfileKind()), + Succeeded()); + Writer.mergeRecordsFromWriter(std::move(Writer2), Err); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + Expected R = Reader->getInstrProfRecord("func1", 0x1234); + EXPECT_THAT_ERROR(R.takeError(), Succeeded()); + ASSERT_EQ(1U, R->Counts.size()); + ASSERT_EQ(42U, R->Counts[0]); + + auto RecordsOr = Reader->getMemProfRecord(0x123); + ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded()); + const auto Records = RecordsOr.get(); + ASSERT_EQ(Records.size(), 1U); + EXPECT_EQ(Records[0], MR); +} + +TEST_F(InstrProfTest, test_memprof_invalid_add_record) { + llvm::memprof::MemProfRecord MR; + // At least one of the frames should be a non-inline frame. + MR.CallStack.push_back({0x123, 1, 2, true}); + MR.CallStack.push_back({0x345, 3, 4, true}); + + auto CheckErr = [](Error &&E) { + EXPECT_TRUE(ErrorEquals(instrprof_error::invalid_prof, std::move(E))); + }; + Writer.addRecord(MR, CheckErr); +} + static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; static const char callee3[] = "callee3"; diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -24,6 +24,7 @@ using ::llvm::memprof::CallStackMap; using ::llvm::memprof::MemInfoBlock; using ::llvm::memprof::MemProfRecord; +using ::llvm::memprof::MemProfSchema; using ::llvm::memprof::Meta; using ::llvm::memprof::PortableMemInfoBlock; using ::llvm::memprof::RawMemProfReader; @@ -87,7 +88,7 @@ } MATCHER_P4(FrameContains, Function, LineOffset, Column, Inline, "") { - const std::string ExpectedHash = std::to_string(llvm::MD5Hash(Function)); + const uint64_t ExpectedHash = llvm::MD5Hash(Function); if (arg.Function != ExpectedHash) { *result_listener << "Hash mismatch"; return false; @@ -100,6 +101,30 @@ return false; } +MATCHER_P(EqualsRecord, Want, "") { + if (arg == Want) + return true; + + std::string Explanation; + llvm::raw_string_ostream OS(Explanation); + OS << "\n Want: \n"; + Want.print(OS); + OS << "\n Got: \n"; + arg.print(OS); + OS.flush(); + + *result_listener << Explanation; + return false; +} + +MemProfSchema getFullSchema() { + MemProfSchema Schema; +#define MIBEntryDef(NameTag, Name, Type) Schema.push_back(Meta::Name); +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + return Schema; +} + TEST(MemProf, FillsValue) { std::unique_ptr Symbolizer(new MockSymbolizer()); @@ -153,11 +178,7 @@ /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, /*dealloc_cpu=*/4); - llvm::SmallVector(Meta::Size)> Schema; -#define MIBEntryDef(NameTag, Name, Type) Schema.push_back(Meta::Name); -#include "llvm/ProfileData/MIBEntryDef.inc" -#undef MIBEntryDef - + const MemProfSchema Schema = getFullSchema(); PortableMemInfoBlock Block(Schema, reinterpret_cast(&Info)); EXPECT_EQ(Block.getAllocCount(), Info.AllocCount); @@ -165,9 +186,45 @@ EXPECT_EQ(Block.getAllocCpuId(), Info.AllocCpuId); char Output[sizeof(MemInfoBlock)] = {0}; + // TODO: check the returned value of Ptr from the serialize call. Block.serialize(Schema, Output); EXPECT_EQ(*reinterpret_cast(Output), Info); } +TEST(MemProf, RecordSerializationRoundTrip) { + const MemProfSchema Schema = getFullSchema(); + + llvm::SmallVector Records; + MemProfRecord MR; + + MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, + /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, + /*dealloc_cpu=*/4); + + MR.Info = PortableMemInfoBlock(Info); + MR.CallStack.push_back({0x123, 1, 2, false}); + MR.CallStack.push_back({0x345, 3, 4, false}); + Records.push_back(MR); + + MR.clear(); + MR.Info = PortableMemInfoBlock(Info); + MR.CallStack.push_back({0x567, 5, 6, false}); + MR.CallStack.push_back({0x789, 7, 8, false}); + Records.push_back(MR); + + std::string Buffer; + llvm::raw_string_ostream OS(Buffer); + serializeRecords(Records, Schema, OS); + OS.flush(); + + const llvm::SmallVector GotRecords = deserializeRecords( + Schema, reinterpret_cast(Buffer.data())); + + ASSERT_TRUE(!GotRecords.empty()); + EXPECT_EQ(GotRecords.size(), Records.size()); + EXPECT_THAT(GotRecords[0], EqualsRecord(Records[0])); + EXPECT_THAT(GotRecords[1], EqualsRecord(Records[1])); +} + } // namespace