diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -650,7 +650,7 @@ /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 8 +#define INSTR_PROF_INDEX_VERSION 9 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 diff --git a/compiler-rt/test/profile/Linux/binary-id.c b/compiler-rt/test/profile/Linux/binary-id.c --- a/compiler-rt/test/profile/Linux/binary-id.c +++ b/compiler-rt/test/profile/Linux/binary-id.c @@ -1,13 +1,13 @@ // REQUIRES: linux // RUN: %clang_profgen -Wl,--build-id=none -O2 -o %t %s // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t -// RUN: llvm-profdata show --binary-ids %t.profraw > %t.out +// RUN: llvm-profdata show --binary-ids %t.profraw > %t.out // RUN: FileCheck %s --check-prefix=NO-BINARY-ID < %t.out // RUN: llvm-profdata merge -o %t.profdata %t.profraw // RUN: %clang_profgen -Wl,--build-id -O2 -o %t %s // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t -// RUN: llvm-profdata show --binary-ids %t.profraw > %t.profraw.out +// RUN: llvm-profdata show --binary-ids %t.profraw > %t.profraw.out // RUN: FileCheck %s --check-prefix=BINARY-ID-RAW-PROF < %t.profraw.out // RUN: rm -rf %t.profdir @@ -17,6 +17,10 @@ // RUN: llvm-profdata show --binary-ids %t.profdir/default_*.profraw > %t.profraw.out // RUN: FileCheck %s --check-prefix=BINARY-ID-MERGE-PROF < %t.profraw.out +// RUN: llvm-profdata merge -o %t.profdata %t.profraw %t.profraw +// RUN: llvm-profdata show --binary-ids %t.profdata > %t.profdata.out +// RUN: FileCheck %s --check-prefix=BINARY-ID-INDEXED-PROF < %t.profraw.out + void foo() { } @@ -48,3 +52,10 @@ // BINARY-ID-MERGE-PROF-NEXT: Maximum internal block count: 0 // BINARY-ID-MERGE-PROF-NEXT: Binary IDs: // BINARY-ID-MERGE-PROF-NEXT: {{[0-9a-f]+}} + +// BINARY-ID-INDEXED-PROF: Instrumentation level: Front-end +// BINARY-ID-INDEXED-PROF-NEXT: Total functions: 3 +// BINARY-ID-INDEXED-PROF-NEXT: Maximum function count: 3 +// BINARY-ID-INDEXED-PROF-NEXT: Maximum internal block count: 0 +// BINARY-ID-INDEXED-PROF-NEXT: Binary IDs: +// BINARY-ID-INDEXED-PROF-NEXT: {{[0-9a-f]+}} diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1050,7 +1050,9 @@ Version7 = 7, // An additional (optional) memory profile type is added. Version8 = 8, - // The current version is 8. + // Binary ids are added. + Version9 = 9, + // The current version is 9. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1068,6 +1070,7 @@ uint64_t HashType; uint64_t HashOffset; uint64_t MemProfOffset; + uint64_t BinaryIdOffset; // New fields should only be added at the end to ensure that the size // computation is correct. The methods below need to be updated to ensure that // the new field is read correctly. diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -650,7 +650,7 @@ /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 8 +#define INSTR_PROF_INDEX_VERSION 9 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -17,12 +17,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/Object/BuildID.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/OnDiskHashTable.h" #include "llvm/Support/SwapByteOrder.h" @@ -96,7 +98,12 @@ /// Read a single record. virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; - /// Print binary ids on stream OS. + /// Read a list of binary ids. + virtual Error readBinaryIds(std::vector &BinaryIds) { + return success(); + } + + /// Print binary ids. virtual Error printBinaryIds(raw_ostream &OS) { return success(); }; /// Iterator over profile data. @@ -295,7 +302,9 @@ uint32_t ValueKindLast; uint32_t CurValueDataSize; - uint64_t BinaryIdsSize; + /// Total size of binary ids. + uint64_t BinaryIdsSize{0}; + /// Start address of binary id length and data pairs. const uint8_t *BinaryIdsStart; public: @@ -310,6 +319,7 @@ static bool hasFormat(const MemoryBuffer &DataBuffer); Error readHeader() override; Error readNextRecord(NamedInstrProfRecord &Record) override; + Error readBinaryIds(std::vector &BinaryIds) override; Error printBinaryIds(raw_ostream &OS) override; uint64_t getVersion() const override { return Version; } @@ -596,6 +606,10 @@ std::unique_ptr MemProfRecordTable; /// MemProf frame profile data on-disk indexed via frame id. std::unique_ptr MemProfFrameTable; + /// Total size of binary ids. + uint64_t BinaryIdsSize{0}; + /// Start address of binary id length and data pairs. + const uint8_t *BinaryIdsStart; // Index to the current record in the record array. unsigned RecordIndex; @@ -706,6 +720,9 @@ return *(Summary.get()); } } + + Error readBinaryIds(std::vector &BinaryIds) override; + Error printBinaryIds(raw_ostream &OS) override; }; } // end namespace llvm diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -18,6 +18,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Object/BuildID.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" @@ -50,6 +51,9 @@ // inline. llvm::MapVector MemProfFrameData; + // List of binary ids. + std::vector BinaryIds; + // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. @@ -79,6 +83,9 @@ bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F, function_ref Warn); + // Add a binary ids to the binary ids list. + void addBinaryIds(ArrayRef BIs); + /// Merge existing function counts from the given writer. void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1372,9 +1372,12 @@ // When a new field is added in the header add a case statement here to // populate it. static_assert( - IndexedInstrProf::ProfVersion::CurrentVersion == Version8, + IndexedInstrProf::ProfVersion::CurrentVersion == Version9, "Please update the reading code below if a new field has been added, " "if not add a case statement to fall through to the latest version."); + case 9ull: + H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset)); + [[fallthrough]]; case 8ull: H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset)); [[fallthrough]]; @@ -1391,10 +1394,12 @@ // When a new field is added to the header add a case statement here to // compute the size as offset of the new field + size of the new field. This // relies on the field being added to the end of the list. - static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version8, + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version9, "Please update the size computation below if a new field has " "been added to the header, if not add a case statement to " "fall through to the latest version."); + case 9ull: + return offsetOf(&Header::BinaryIdOffset) + sizeof(Header::BinaryIdOffset); case 8ull: return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset); default: // Version7 (when the backwards compatible header was introduced). diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -75,6 +75,81 @@ return Reader.readHeader(); } +static Error +readBinaryIdsInternal(const MemoryBuffer &DataBuffer, uint64_t BinaryIdsSize, + const uint8_t *BinaryIdsStart, + std::vector &BinaryIds) { + using namespace support; + + if (BinaryIdsSize == 0) + return Error::success(); + + const uint8_t *BI = BinaryIdsStart; + const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; + const uint8_t *End = + reinterpret_cast(DataBuffer.getBufferEnd()); + + while (BI < BIEnd) { + size_t Remaining = BIEnd - BI; + // There should be enough left to read the binary id length. + if (Remaining < sizeof(uint64_t)) + return make_error( + instrprof_error::malformed, + "not enough data to read binary id length"); + + // Read binary id length. + uint64_t BILen = endian::readNext(BI); + if (BILen == 0) + return make_error(instrprof_error::malformed, + "binary id length is 0"); + + Remaining = BIEnd - BI; + // There should be enough left to read the binary id data. + if (Remaining < alignToPowerOf2(BILen, sizeof(uint64_t))) + return make_error( + instrprof_error::malformed, "not enough data to read binary id data"); + + if (BI > End) + return make_error( + instrprof_error::malformed, + "binary id that is read is bigger than buffer size"); + + // Add binary id to the binary ids list. + BinaryIds.push_back(object::BuildID(BI, BI + BILen)); + + // Increment by binary id data length, which aligned to the size of uint64. + BI += alignToPowerOf2(BILen, sizeof(uint64_t)); + if (BI > End) + return make_error( + instrprof_error::malformed, + "binary id section is greater than buffer size"); + } + + return Error::success(); +} + +static Error printBinaryIdsInternal(raw_ostream &OS, + const MemoryBuffer &DataBuffer, + uint64_t BinaryIdsSize, + const uint8_t *BinaryIdsStart) { + if (BinaryIdsSize == 0) + return Error::success(); + + std::vector BinaryIds; + if (Error E = readBinaryIdsInternal(DataBuffer, BinaryIdsSize, BinaryIdsStart, + BinaryIds)) + return E; + + OS << "Binary IDs: \n"; + for (auto BI : BinaryIds) { + for (uint64_t I = 0; I < BI.size(); I++) + OS << format("%02x", BI[I]); + OS << "\n"; + } + + return Error::success(); +} + Expected> InstrProfReader::create(const Twine &Path, const InstrProfCorrelator *Correlator) { @@ -573,54 +648,16 @@ return success(); } -static size_t RoundUp(size_t size, size_t align) { - return (size + align - 1) & ~(align - 1); +template +Error RawInstrProfReader::readBinaryIds( + std::vector &BinaryIds) { + return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart, + BinaryIds); } template Error RawInstrProfReader::printBinaryIds(raw_ostream &OS) { - if (BinaryIdsSize == 0) - return success(); - - OS << "Binary IDs: \n"; - const uint8_t *BI = BinaryIdsStart; - const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; - while (BI < BIEnd) { - size_t Remaining = BIEnd - BI; - - // There should be enough left to read the binary ID size field. - if (Remaining < sizeof(uint64_t)) - return make_error( - instrprof_error::malformed, - "not enough data to read binary id length"); - - uint64_t BinaryIdLen = swap(*reinterpret_cast(BI)); - - // There should be enough left to read the binary ID size field, and the - // binary ID. - if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) - return make_error( - instrprof_error::malformed, "not enough data to read binary id data"); - - // Increment by binary id length data type size. - BI += sizeof(BinaryIdLen); - if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) - return make_error( - instrprof_error::malformed, - "binary id that is read is bigger than buffer size"); - - for (uint64_t I = 0; I < BinaryIdLen; I++) - OS << format("%02x", BI[I]); - OS << "\n"; - - // Increment by binary id data length, rounded to the next 8 bytes. This - // accounts for the zero-padding after each build ID. - BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); - if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) - return make_error(instrprof_error::malformed); - } - - return success(); + return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart); } namespace llvm { @@ -948,9 +985,9 @@ Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, /* UseCS */ false); if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF) - Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, - /* UseCS */ true); - + Cur = + readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, + /* UseCS */ true); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( endian::byte_swap(Header->HashType)); @@ -963,8 +1000,8 @@ auto IndexPtr = std::make_unique>( Start + HashOffset, Cur, Start, HashType, Header->formatVersion()); - // The MemProfOffset field in the header is only valid when the format version - // is higher than 8 (when it was introduced). + // The MemProfOffset field in the header is only valid when the format + // version is higher than 8 (when it was introduced). if (GET_VERSION(Header->formatVersion()) >= 8 && Header->formatVersion() & VARIANT_MASK_MEMPROF) { uint64_t MemProfOffset = @@ -974,7 +1011,8 @@ // The value returned from RecordTableGenerator.Emit. const uint64_t RecordTableOffset = support::endian::readNext(Ptr); - // The offset in the stream right before invoking FrameTableGenerator.Emit. + // The offset in the stream right before invoking + // FrameTableGenerator.Emit. const uint64_t FramePayloadOffset = support::endian::readNext(Ptr); // The value returned from FrameTableGenerator.Emit. @@ -1000,11 +1038,28 @@ /*Base=*/Start, memprof::FrameLookupTrait())); } + // BinaryIdOffset field in the header is only valid when the format version + // is higher than 9 (when it was introduced). + if (GET_VERSION(Header->formatVersion()) >= 9) { + uint64_t BinaryIdOffset = + endian::byte_swap(Header->BinaryIdOffset); + const unsigned char *Ptr = Start + BinaryIdOffset; + // Read binary ids size. + BinaryIdsSize = support::endian::readNext(Ptr); + if (BinaryIdsSize % sizeof(uint64_t)) + return error(instrprof_error::bad_header); + // Set the binary ids start. + BinaryIdsStart = Ptr; + if (BinaryIdsStart > (const unsigned char *)DataBuffer->getBufferEnd()) + return make_error(instrprof_error::malformed, + "corrupted binary ids"); + } + // Load the remapping table now if requested. if (RemappingBuffer) { - Remapper = std::make_unique< - InstrProfReaderItaniumRemapper>( - std::move(RemappingBuffer), *IndexPtr); + Remapper = + std::make_unique>( + std::move(RemappingBuffer), *IndexPtr); if (Error E = Remapper->populateRemappings()) return E; } else { @@ -1136,6 +1191,16 @@ return success(); } +Error IndexedInstrProfReader::readBinaryIds( + std::vector &BinaryIds) { + return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart, + BinaryIds); +} + +Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) { + return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart); +} + void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { uint64_t NumFuncs = 0; for (const auto &Func : *this) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -54,6 +54,7 @@ uint64_t tell() { return OS.tell(); } void write(uint64_t V) { LE.write(V); } + void writeByte(uint8_t V) { LE.write(V); } // \c patch can only be called when all data is written and flushed. // For raw_string_ostream, the patch is done on the target string @@ -280,12 +281,20 @@ return true; } +void InstrProfWriter::addBinaryIds(ArrayRef BIs) { + llvm::append_range(BinaryIds, BIs); +} + void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn) { for (auto &I : IPW.FunctionData) for (auto &Func : I.getValue()) addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); + BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size()); + for (auto &I : IPW.BinaryIds) + addBinaryIds(I); + MemProfFrameData.reserve(IPW.MemProfFrameData.size()); for (auto &I : IPW.MemProfFrameData) { // If we weren't able to add the frame mappings then it doesn't make sense @@ -330,6 +339,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { using namespace IndexedInstrProf; + using namespace support; OnDiskChainedHashTableGenerator Generator; @@ -365,11 +375,13 @@ Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; Header.MemProfOffset = 0; + Header.BinaryIdOffset = 0; int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); - // Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We - // need to remember the offset of these fields to allow back patching later. - for (int I = 0; I < N - 2; I++) + // Only write out all the fields except 'HashOffset', 'MemProfOffset' and + // 'BinaryIdOffset'. We need to remember the offset of these fields to allow + // back patching later. + for (int I = 0; I < N - 3; I++) OS.write(reinterpret_cast(&Header)[I]); // Save the location of Header.HashOffset field in \c OS. @@ -384,6 +396,12 @@ // profile contains memory profile information. OS.write(0); + // Save the location of binary ids section. + uint64_t BinaryIdSectionOffset = OS.tell(); + // Reserve space for the BinaryIdOffset field to be patched later if this + // profile contains binary ids. + OS.write(0); + // Reserve space to write profile summary data. uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); @@ -460,6 +478,43 @@ OS.patch(PatchItems, 3); } + // BinaryIdSection has two parts: + // 1. uint64_t BinaryIdsSectionSize + // 2. list of binary ids that consist of: + // a. uint64_t BinaryIdLength + // b. uint8_t BinaryIdData + // c. uint8_t Padding (if necessary) + uint64_t BinaryIdSectionStart = OS.tell(); + // Calculate size of binary section. + uint64_t BinaryIdsSectionSize = 0; + + // Remove duplicate binary ids. + llvm::sort(BinaryIds); + BinaryIds.erase(std::unique(BinaryIds.begin(), BinaryIds.end()), + BinaryIds.end()); + + for (auto BI : BinaryIds) { + // Increment by binary id length data type size. + BinaryIdsSectionSize += sizeof(uint64_t); + // Increment by binary id data length, aligned to 8 bytes. + BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t)); + } + // Write binary ids section size. + OS.write(BinaryIdsSectionSize); + + for (auto BI : BinaryIds) { + uint64_t BILen = BI.size(); + // Write binary id length. + OS.write(BILen); + // Write binary id data. + for (unsigned K = 0; K < BILen; K++) + OS.writeByte(BI[K]); + // Write padding if necessary. + uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen; + for (unsigned K = 0; K < PaddingSize; K++) + OS.writeByte(0); + } + // Allocate space for data to be serialized out. std::unique_ptr TheSummary = IndexedInstrProf::allocSummary(SummarySize); @@ -482,8 +537,11 @@ PatchItem PatchItems[] = { // Patch the Header.HashOffset field. {HashTableStartFieldOffset, &HashTableStart, 1}, - // Patch the Header.MemProfOffset (=0 for profiles without MemProf data). + // Patch the Header.MemProfOffset (=0 for profiles without MemProf + // data). {MemProfSectionOffset, &MemProfSectionStart, 1}, + // Patch the Header.BinaryIdSectionOffset. + {BinaryIdSectionOffset, &BinaryIdSectionStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), (int)(SummarySize / sizeof(uint64_t))}, diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -338,9 +338,16 @@ FuncName, firstTime); }); } - if (Reader->hasError()) + + if (Reader->hasError()) { if (Error E = Reader->getError()) WC->Errors.emplace_back(std::move(E), Filename); + } + + std::vector BinaryIds; + if (Error E = Reader->readBinaryIds(BinaryIds)) + WC->Errors.emplace_back(std::move(E), Filename); + WC->Writer.addBinaryIds(BinaryIds); } /// Merge the \p Src writer context into \p Dst.