Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -49,7 +49,8 @@ unsupported_writing_format, truncated_name_table, not_implemented, - counter_overflow + counter_overflow, + ostream_seek_unsupported }; inline std::error_code make_error_code(sampleprof_error E) { Index: include/llvm/ProfileData/SampleProfReader.h =================================================================== --- include/llvm/ProfileData/SampleProfReader.h +++ include/llvm/ProfileData/SampleProfReader.h @@ -364,7 +364,7 @@ : SampleProfileReader(std::move(B), C, Format) {} /// Read and validate the file header. - std::error_code readHeader() override; + virtual std::error_code readHeader() override; /// Read sample profiles from the associated file. std::error_code read() override; @@ -378,6 +378,10 @@ /// \returns the read value. template ErrorOr readNumber(); + /// Read a numeric value of type T from the profile. The value is saved + /// without encoded. + template ErrorOr readUnencodedNumber(); + /// Read a string from the profile. /// /// If an error occurs during decoding, a diagnostic message is emitted and @@ -392,6 +396,9 @@ /// Return true if we've reached the end of file. bool at_eof() const { return Data >= End; } + /// Read the next function profile instance. + std::error_code readFuncProfile(); + /// Read the contents of the given profile instance. std::error_code readProfile(FunctionSamples &FProfile); @@ -436,10 +443,15 @@ private: /// Function name table. std::vector NameTable; + /// The table mapping from function name to the offset of its FunctionSample + /// towards file start. + DenseMap FuncOffsetTable; virtual std::error_code verifySPMagic(uint64_t Magic) override; virtual std::error_code readNameTable() override; /// Read a string indirectly via the name table. virtual ErrorOr readStringFromTable() override; + virtual std::error_code readHeader() override; + std::error_code readFuncOffsetTable(); public: SampleProfileReaderCompactBinary(std::unique_ptr B, @@ -448,6 +460,9 @@ /// \brief Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); + + /// Read samples only for functions in FNamesSet. + std::error_code read(DenseSet &FNamesSet); }; using InlineCallStack = SmallVector; Index: include/llvm/ProfileData/SampleProfWriter.h =================================================================== --- include/llvm/ProfileData/SampleProfWriter.h +++ include/llvm/ProfileData/SampleProfWriter.h @@ -42,7 +42,7 @@ /// Write all the sample profiles in the given map of samples. /// /// \returns status code of the file update operation. - std::error_code write(const StringMap &ProfileMap); + virtual std::error_code write(const StringMap &ProfileMap); raw_ostream &getOutputStream() { return *OutputStream; } @@ -103,14 +103,15 @@ /// Sample-based profile writer (binary format). class SampleProfileWriterBinary : public SampleProfileWriter { public: - std::error_code write(const FunctionSamples &S) override; + virtual std::error_code write(const FunctionSamples &S) override; SampleProfileWriterBinary(std::unique_ptr &OS) : SampleProfileWriter(OS) {} protected: virtual std::error_code writeNameTable() = 0; virtual std::error_code writeMagicIdent() = 0; - std::error_code writeHeader(const StringMap &ProfileMap) override; + virtual std::error_code + writeHeader(const StringMap &ProfileMap) override; std::error_code writeSummary(); std::error_code writeNameIdx(StringRef FName); std::error_code writeBody(const FunctionSamples &S); @@ -135,12 +136,56 @@ virtual std::error_code writeMagicIdent() override; }; +// CompactBinary is a compact format of binary profile which both reduces +// the profile size and the load time needed when compiling. It has two +// major difference with Binary format. +// 1. It represents all the strings in name table using md5 hash. +// 2. It saves a function offset table which maps function name index to +// the offset of its function profile to the start of the binary profile, +// so by using the function offset table, for those function profiles which +// will not be needed when compiling a module, the profile reader does't +// have to read them and it saves compile time if the profile size is huge. +// The layout of the compact format is shown as follows: +// +// Part1: Profile header, the same as binary format, containing magic +// number, version, summary, name table... +// Part2: Function Offset Table Offset, which saves the position of +// Part4. +// Part3: Function profile collection +// function1 profile start +// .... +// function2 profile start +// .... +// function3 profile start +// .... +// ...... +// Part4: Function Offset Table +// function1 name index --> function1 profile start +// function2 name index --> function2 profile start +// function3 name index --> function3 profile start +// +// We need Part2 because profile reader can use it to find out and read +// function offset table without reading Part3 first. class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; +public: + virtual std::error_code write(const FunctionSamples &S) override; + virtual std::error_code + write(const StringMap &ProfileMap) override; + protected: + /// The table mapping from function name to the offset of its FunctionSample + /// towards profile start. + MapVector FuncOffsetTable; + /// The offset of the slot to be filled with the offset of FuncOffsetTable + /// towards profile start. + uint64_t TableOffset; virtual std::error_code writeNameTable() override; virtual std::error_code writeMagicIdent() override; + virtual std::error_code + writeHeader(const StringMap &ProfileMap) override; + std::error_code writeFuncOffsetTable(); }; } // end namespace sampleprof Index: lib/ProfileData/SampleProfReader.cpp =================================================================== --- lib/ProfileData/SampleProfReader.cpp +++ lib/ProfileData/SampleProfReader.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include @@ -320,6 +321,21 @@ } template +ErrorOr SampleProfileReaderBinary::readUnencodedNumber() { + std::error_code EC; + + if (Data + sizeof(T) > End) { + EC = sampleprof_error::truncated; + reportError(0, EC.message()); + return EC; + } + + using namespace support; + T Val = endian::readNext(Data); + return Val; +} + +template inline ErrorOr SampleProfileReaderBinary::readStringIndex(T &Table) { std::error_code EC; auto Idx = readNumber(); @@ -423,29 +439,52 @@ return sampleprof_error::success; } -std::error_code SampleProfileReaderBinary::read() { - while (!at_eof()) { - auto NumHeadSamples = readNumber(); - if (std::error_code EC = NumHeadSamples.getError()) - return EC; +std::error_code SampleProfileReaderBinary::readFuncProfile() { + auto NumHeadSamples = readNumber(); + if (std::error_code EC = NumHeadSamples.getError()) + return EC; - auto FName(readStringFromTable()); - if (std::error_code EC = FName.getError()) - return EC; + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; - Profiles[*FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[*FName]; - FProfile.setName(*FName); + Profiles[*FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[*FName]; + FProfile.setName(*FName); - FProfile.addHeadSamples(*NumHeadSamples); + FProfile.addHeadSamples(*NumHeadSamples); - if (std::error_code EC = readProfile(FProfile)) + if (std::error_code EC = readProfile(FProfile)) + return EC; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderBinary::read() { + while (!at_eof()) { + if (std::error_code EC = readFuncProfile()) return EC; } return sampleprof_error::success; } +std::error_code +SampleProfileReaderCompactBinary::read(DenseSet &FNamesSet) { + for (auto Name : FNamesSet) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *SavedData = Data; + Data = reinterpret_cast(Buffer->getBufferStart()) + + iter->second; + if (std::error_code EC = readFuncProfile()) + return EC; + Data = SavedData; + } + return sampleprof_error::success; +} + std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { if (Magic == SPMagic()) return sampleprof_error::success; @@ -514,6 +553,45 @@ return sampleprof_error::success; } +std::error_code SampleProfileReaderCompactBinary::readHeader() { + SampleProfileReaderBinary::readHeader(); + if (std::error_code EC = readFuncOffsetTable()) + return EC; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() { + auto TableOffset = readUnencodedNumber(); + if (std::error_code EC = TableOffset.getError()) + return EC; + + const uint8_t *SavedData = Data; + const uint8_t *TableStart = + reinterpret_cast(Buffer->getBufferStart()) + + *TableOffset; + Data = TableStart; + + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + + FuncOffsetTable.reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + + auto Offset = readNumber(); + if (std::error_code EC = Offset.getError()) + return EC; + + FuncOffsetTable[*FName] = *Offset; + } + End = TableStart; + Data = SavedData; + return sampleprof_error::success; +} + std::error_code SampleProfileReaderBinary::readSummaryEntry( std::vector &Entries) { auto Cutoff = readNumber(); Index: lib/ProfileData/SampleProfWriter.cpp =================================================================== --- lib/ProfileData/SampleProfWriter.cpp +++ lib/ProfileData/SampleProfWriter.cpp @@ -22,6 +22,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" @@ -64,6 +66,15 @@ return sampleprof_error::success; } +std::error_code SampleProfileWriterCompactBinary::write( + const StringMap &ProfileMap) { + if (std::error_code EC = SampleProfileWriter::write(ProfileMap)) + return EC; + if (std::error_code EC = writeFuncOffsetTable()) + return EC; + return sampleprof_error::success; +} + /// Write samples to a text file. /// /// Note: it may be tempting to implement this in terms of @@ -168,6 +179,30 @@ return sampleprof_error::success; } +std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() { + auto &OS = *OutputStream; + + // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable. + auto &OFS = static_cast(OS); + uint64_t FuncOffsetTableStart = OS.tell(); + if (OFS.seek(TableOffset) == (uint64_t)-1) + return sampleprof_error::ostream_seek_unsupported; + support::endian::Writer Writer(*OutputStream, support::little); + Writer.write(FuncOffsetTableStart); + if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1) + return sampleprof_error::ostream_seek_unsupported; + + // Write out the table size. + encodeULEB128(FuncOffsetTable.size(), OS); + + // Write out FuncOffsetTable. + for (auto entry : FuncOffsetTable) { + writeNameIdx(entry.first); + encodeULEB128(entry.second, OS); + } + return sampleprof_error::success; +} + std::error_code SampleProfileWriterCompactBinary::writeNameTable() { auto &OS = *OutputStream; std::set V; @@ -215,6 +250,19 @@ return sampleprof_error::success; } +std::error_code SampleProfileWriterCompactBinary::writeHeader( + const StringMap &ProfileMap) { + support::endian::Writer Writer(*OutputStream, support::little); + if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap)) + return EC; + + // Reserve a slot for the offset of function offset table. The slot will + // be populated with the offset of FuncOffsetTable later. + TableOffset = OutputStream->tell(); + Writer.write(static_cast(-2)); + return sampleprof_error::success; +} + std::error_code SampleProfileWriterBinary::writeSummary() { auto &OS = *OutputStream; encodeULEB128(Summary->getTotalCount(), OS); @@ -283,6 +331,15 @@ return writeBody(S); } +std::error_code +SampleProfileWriterCompactBinary::write(const FunctionSamples &S) { + uint64_t Offset = OutputStream->tell(); + StringRef Name = S.getName(); + FuncOffsetTable[Name] = Offset; + encodeULEB128(S.getHeadSamples(), *OutputStream); + return writeBody(S); +} + /// Create a sample profile file writer based on the specified format. /// /// \param Filename The file to create. Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -1506,6 +1506,15 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) +static std::unique_ptr> collectFuncNames(Module &M) { + auto Ptr = make_unique>(); + for (auto &F : M) { + StringRef Fname = F.getName().split('.').first; + Ptr->insert(Fname); + } + return Ptr; +} + bool SampleProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx); @@ -1514,8 +1523,16 @@ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); return false; } + std::unique_ptr> FuncNamesSetPtr = collectFuncNames(M); Reader = std::move(ReaderOrErr.get()); - ProfileIsValid = (Reader->read() == sampleprof_error::success); + if (Reader->getFormat() == SPF_Compact_Binary) { + SampleProfileReaderCompactBinary *CompactBinaryReader = + static_cast(&*Reader); + ProfileIsValid = (CompactBinaryReader->read(*FuncNamesSetPtr) == + sampleprof_error::success); + } else { + ProfileIsValid = (Reader->read() == sampleprof_error::success); + } return true; } Index: unittests/ProfileData/SampleProfTest.cpp =================================================================== --- unittests/ProfileData/SampleProfTest.cpp +++ unittests/ProfileData/SampleProfTest.cpp @@ -36,14 +36,17 @@ namespace { struct SampleProfTest : ::testing::Test { - std::string Data; LLVMContext Context; + std::string Profile; std::unique_ptr OS; std::unique_ptr Writer; std::unique_ptr Reader; + std::error_code EC; SampleProfTest() - : Data(), OS(new raw_string_ostream(Data)), Writer(), Reader() {} + : Profile("profile"), + OS(new raw_fd_ostream(Profile, EC, sys::fs::F_None)), Writer(), + Reader() {} void createWriter(SampleProfileFormat Format) { auto WriterOrErr = SampleProfileWriter::create(OS, Format); @@ -51,7 +54,7 @@ Writer = std::move(WriterOrErr.get()); } - void readProfile(std::unique_ptr &Profile) { + void readProfile() { auto ReaderOrErr = SampleProfileReader::create(Profile, Context); ASSERT_TRUE(NoError(ReaderOrErr.getError())); Reader = std::move(ReaderOrErr.get()); @@ -93,8 +96,7 @@ Writer->getOutputStream().flush(); - auto Profile = MemoryBuffer::getMemBufferCopy(Data); - readProfile(Profile); + readProfile(); EC = Reader->read(); ASSERT_TRUE(NoError(EC));