Index: include/llvm/ProfileData/InstrProf.h =================================================================== --- include/llvm/ProfileData/InstrProf.h +++ include/llvm/ProfileData/InstrProf.h @@ -576,6 +576,10 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, ValueProfData *Dst); +namespace IndexedInstrProf { +struct Summary; +} + ///// Profile summary computation //// // The 'show' command displays richer summary of the profile data. The profile // summary is one or more (Cutoff, MinBlockCount, NumBlocks) triplets. Given a @@ -585,6 +589,10 @@ uint32_t Cutoff; ///< The required percentile of total execution count. uint64_t MinBlockCount; ///< The minimum execution count for this percentile. uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. + ProfileSummaryEntry(uint32_t TheCutoff, uint64_t TheMinBlockCount, + uint64_t TheNumBlocks) + : Cutoff(TheCutoff), MinBlockCount(TheMinBlockCount), + NumBlocks(TheNumBlocks) {} }; class ProfileSummary { @@ -598,15 +606,17 @@ uint64_t MaxBlockCount, MaxInternalBlockCount, MaxFunctionCount; uint32_t NumBlocks, NumFunctions; inline void addCount(uint64_t Count, bool IsEntry); - void computeDetailedSummary(); public: static const int Scale = 1000000; ProfileSummary(std::vector Cutoffs) : DetailedSummaryCutoffs(Cutoffs), TotalCount(0), MaxBlockCount(0), - MaxInternalBlockCount(0), MaxFunctionCount(0), NumBlocks(0), NumFunctions(0) {} + MaxInternalBlockCount(0), MaxFunctionCount(0), NumBlocks(0), + NumFunctions(0) {} + ProfileSummary(const IndexedInstrProf::Summary &S); inline void addRecord(const InstrProfRecord &); inline std::vector &getDetailedSummary(); + void computeDetailedSummary(); uint32_t getNumBlocks() { return NumBlocks; } uint64_t getTotalCount() { return TotalCount; } uint32_t getNumFunctions() { return NumFunctions; } @@ -684,7 +694,10 @@ // Version 3 supports value profile data. The value profile data is expected // to follow the block counter profile data. Version3 = 3, - // The current version is 3. + // In this version, profile summary data \c IndexedInstrProf::Summary is + // stored after the profile header. + Version4 = 4, + // The current version is 4. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -698,11 +711,58 @@ struct Header { uint64_t Magic; uint64_t Version; - uint64_t MaxFunctionCount; + uint64_t Unused; // Becomes unused since version 4 uint64_t HashType; uint64_t HashOffset; }; +static const uint32_t SummaryCutoffs[] = { + 10000, /* 1% */ + 100000, /* 10% */ + 200000, 300000, 400000, 500000, 600000, 500000, 600000, 700000, + 800000, 900000, 950000, 990000, 999000, 999900, 999990, 999999}; +static const uint32_t NumSummaryCutoffs = + sizeof(SummaryCutoffs) / sizeof(*SummaryCutoffs); +// Profile summary data recorded in the profile data file in indexed +// format. It is introduced in version 4. The summary data follows +// right after the profile file header. +struct Summary { + struct Entry { + uint64_t Cutoff; ///< The required percentile of total execution count. + uint64_t + MinBlockCount; ///< The minimum execution count for this percentile. + uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. + }; + + /// The maximal execution count among all functions. + /// This field does not exist for profile data from IR based instrumentation. + uint64_t MaxFunctionCount; + /// Max block count of the program. + uint64_t MaxBlockCount; + /// Max internal block count of the program (excluding entry blocks). + uint64_t MaxInternalBlockCount; + /// Total number of instrumented blocks/edges. + uint64_t TotalNumBlocks; + /// The sum of all instrumented block counts. + uint64_t TotalBlockCount; + /// The total number of functions instrumented. + uint64_t TotalNumFunctions; + // Reserved fields for future + uint64_t Reserved[5]; + /// Number of Cutoff entries + uint64_t NumEntries; + Entry Entries[1]; + static inline uint32_t getSize(uint32_t NumEntries) { + return sizeof(Summary) + (NumEntries - 1) * sizeof(Entry); + } + Summary(uint32_t Size) { memset(this, 0, Size); } + Summary() = delete; +}; + +inline std::unique_ptr allocSummary(uint32_t TotalSize) { + return std::unique_ptr(new (::operator new(TotalSize)) + Summary(TotalSize)); +} } // end namespace IndexedInstrProf namespace RawInstrProf { Index: include/llvm/ProfileData/InstrProfData.inc =================================================================== --- include/llvm/ProfileData/InstrProfData.inc +++ include/llvm/ProfileData/InstrProfData.inc @@ -697,7 +697,7 @@ /* Raw profile format version. */ #define INSTR_PROF_RAW_VERSION 2 -#define INSTR_PROF_INDEX_VERSION 3 +#define INSTR_PROF_INDEX_VERSION 4 #define INSTR_PROF_COVMAP_VERSION 0 /* Profile version is always of type uint_64_t. Reserve the upper 8 bits in the Index: include/llvm/ProfileData/InstrProfReader.h =================================================================== --- include/llvm/ProfileData/InstrProfReader.h +++ include/llvm/ProfileData/InstrProfReader.h @@ -336,12 +336,17 @@ std::unique_ptr DataBuffer; /// The index into the profile data. std::unique_ptr Index; - /// The maximal execution count among all functions. - uint64_t MaxFunctionCount; + /// Profile summary data. + std::unique_ptr Summary; IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; + // Read the profile summary. Return a pointer pointing to one byte pass the + // end of the summary data if it exists or the input \c Cur. + const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, + const unsigned char *Cur); + public: uint64_t getVersion() const { return Index->getVersion(); } IndexedInstrProfReader(std::unique_ptr DataBuffer) @@ -365,7 +370,7 @@ std::vector &Counts); /// Return the maximum of all known function counts. - uint64_t getMaximumFunctionCount() { return MaxFunctionCount; } + uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); } /// Factory method to create an indexed reader. static ErrorOr> @@ -383,6 +388,7 @@ // to be used by llvm-profdata (for dumping). Avoid using this when // the client is the compiler. InstrProfSymtab &getSymtab() override; + ProfileSummary &getSummary() { return *(Summary.get()); } }; } // end namespace llvm Index: include/llvm/ProfileData/InstrProfWriter.h =================================================================== --- include/llvm/ProfileData/InstrProfWriter.h +++ include/llvm/ProfileData/InstrProfWriter.h @@ -33,7 +33,6 @@ private: StringMap FunctionData; - uint64_t MaxFunctionCount; // Use raw pointer here for the incomplete type object. InstrProfRecordWriterTrait *InfoObj; Index: lib/ProfileData/InstrProf.cpp =================================================================== --- lib/ProfileData/InstrProf.cpp +++ lib/ProfileData/InstrProf.cpp @@ -641,4 +641,15 @@ return; } +ProfileSummary::ProfileSummary(const IndexedInstrProf::Summary &S) + : TotalCount(S.TotalBlockCount), MaxBlockCount(S.MaxBlockCount), + MaxInternalBlockCount(S.MaxInternalBlockCount), + MaxFunctionCount(S.MaxFunctionCount), NumBlocks(S.TotalNumBlocks), + NumFunctions(S.TotalNumFunctions) { + for (unsigned I = 0; I < S.NumEntries; I++) { + const IndexedInstrProf::Summary::Entry &Ent = S.Entries[I]; + DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, + Ent.NumBlocks); + } +} } Index: lib/ProfileData/InstrProfReader.cpp =================================================================== --- lib/ProfileData/InstrProfReader.cpp +++ lib/ProfileData/InstrProfReader.cpp @@ -554,6 +554,38 @@ return Magic == IndexedInstrProf::Magic; } +const unsigned char * +IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, + const unsigned char *Cur) { + using namespace support; + if (Version >= IndexedInstrProf::Version4) { + const IndexedInstrProf::Summary *SummaryInLE = + reinterpret_cast(Cur); + uint64_t NEntries = + endian::byte_swap(SummaryInLE->NumEntries); + uint32_t SummarySize = IndexedInstrProf::Summary::getSize(NEntries); + std::unique_ptr SummaryData = + IndexedInstrProf::allocSummary(SummarySize); + + const uint64_t *Src = reinterpret_cast(SummaryInLE); + uint64_t *Dst = reinterpret_cast(SummaryData.get()); + for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) + Dst[I] = endian::byte_swap(Src[I]); + + // initialize ProfileSummary using the SummaryData from disk. + this->Summary = llvm::make_unique(*(SummaryData.get())); + return Cur + SummarySize; + } else { + // For older version of profile data, we need to compute on the fly: + using namespace IndexedInstrProf; + std::vector Cutoffs(&SummaryCutoffs[0], + &SummaryCutoffs[NumSummaryCutoffs]); + this->Summary = llvm::make_unique(Cutoffs); + this->Summary->computeDetailedSummary(); + return Cur; + } +} + std::error_code IndexedInstrProfReader::readHeader() { const unsigned char *Start = (const unsigned char *)DataBuffer->getBufferStart(); @@ -576,9 +608,7 @@ if (FormatVersion > IndexedInstrProf::ProfVersion::CurrentVersion) return error(instrprof_error::unsupported_version); - // Read the maximal function count. - MaxFunctionCount = - endian::byte_swap(Header->MaxFunctionCount); + Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( Index: lib/ProfileData/InstrProfWriter.cpp =================================================================== --- lib/ProfileData/InstrProfWriter.cpp +++ lib/ProfileData/InstrProfWriter.cpp @@ -84,6 +84,7 @@ typedef uint64_t offset_type; support::endianness ValueProfDataEndianness; + ProfileSummary *TheProfileSummary; InstrProfRecordWriterTrait() : ValueProfDataEndianness(support::little) {} static hash_value_type ComputeHash(key_type_ref K) { @@ -122,6 +123,7 @@ endian::Writer LE(Out); for (const auto &ProfileData : *V) { const InstrProfRecord &ProfRecord = ProfileData.second; + TheProfileSummary->addRecord(ProfRecord); LE.write(ProfileData.first); // Function hash LE.write(ProfRecord.Counts.size()); @@ -140,8 +142,7 @@ } InstrProfWriter::InstrProfWriter() - : FunctionData(), MaxFunctionCount(0), - InfoObj(new InstrProfRecordWriterTrait()) {} + : FunctionData(), InfoObj(new InstrProfRecordWriterTrait()) {} InstrProfWriter::~InstrProfWriter() { delete InfoObj; } @@ -176,16 +177,35 @@ Dest.sortValueData(); - // We keep track of the max function count as we go for simplicity. - // Update this statistic no matter the result of the merge. - if (Dest.Counts[0] > MaxFunctionCount) - MaxFunctionCount = Dest.Counts[0]; - return Result; } +static void setSummary(IndexedInstrProf::Summary *TheSummary, + ProfileSummary &PS) { + std::vector &Res = PS.getDetailedSummary(); + TheSummary->NumEntries = Res.size(); + TheSummary->MaxFunctionCount = PS.getMaxFunctionCount(); + TheSummary->MaxBlockCount = PS.getMaxBlockCount(); + TheSummary->MaxInternalBlockCount = PS.getMaxInternalBlockCount(); + TheSummary->TotalBlockCount = PS.getTotalCount(); + TheSummary->TotalNumBlocks = PS.getNumBlocks(); + TheSummary->TotalNumFunctions = PS.getNumFunctions(); + for (unsigned I = 0; I < Res.size(); I++) { + TheSummary->Entries[I].Cutoff = Res[I].Cutoff; + TheSummary->Entries[I].MinBlockCount = Res[I].MinBlockCount; + TheSummary->Entries[I].NumBlocks = Res[I].NumBlocks; + } +} + void InstrProfWriter::writeImpl(ProfOStream &OS) { OnDiskChainedHashTableGenerator Generator; + + using namespace IndexedInstrProf; + std::vector Cutoffs(&SummaryCutoffs[0], + &SummaryCutoffs[NumSummaryCutoffs]); + ProfileSummary PS(Cutoffs); + InfoObj->TheProfileSummary = &PS; + // Populate the hash table generator. for (const auto &I : FunctionData) Generator.insert(I.getKey(), &I.getValue()); @@ -193,7 +213,7 @@ IndexedInstrProf::Header Header; Header.Magic = IndexedInstrProf::Magic; Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; - Header.MaxFunctionCount = MaxFunctionCount; + Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); @@ -204,15 +224,37 @@ for (int I = 0; I < N - 1; I++) OS.write(reinterpret_cast(&Header)[I]); - // Save a space to write the hash table start location. - uint64_t HashTableStartLoc = OS.tell(); + // Save the location of Header.HashOffset field in \c OS. + uint64_t HashTableStartFieldOffset = OS.tell(); // Reserve the space for HashOffset field. OS.write(0); + + // Reserve space to write profile summary data. + uint32_t NumEntries = Cutoffs.size(); + uint32_t SummarySize = IndexedInstrProf::Summary::getSize(NumEntries); + // Remember the summary offset. + uint64_t SummaryOffset = OS.tell(); + for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) + OS.write(0); + // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); + // Allocate space for data to be serialized out. + std::unique_ptr TheSummary = + IndexedInstrProf::allocSummary(SummarySize); + // Compute the Summary and copy the data to the data + // structure to be serialized out (to disk or buffer). + setSummary(TheSummary.get(), PS); + InfoObj->TheProfileSummary = 0; + // Now do the final patch: - PatchItem PatchItems[1] = {{HashTableStartLoc, &HashTableStart, 1}}; + PatchItem PatchItems[] = { + // Patch the Header.HashOffset field. + {HashTableStartFieldOffset, &HashTableStart, 1}, + // Patch the summary data. + {SummaryOffset, reinterpret_cast(TheSummary.get()), + (int)(SummarySize / sizeof(uint64_t))}}; OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems)); } Index: unittests/ProfileData/InstrProfTest.cpp =================================================================== --- unittests/ProfileData/InstrProfTest.cpp +++ unittests/ProfileData/InstrProfTest.cpp @@ -125,6 +125,45 @@ ASSERT_TRUE(ErrorEquals(instrprof_error::unknown_function, EC)); } +// Profile data is copied from general.proftext +TEST_F(InstrProfTest, get_profile_summary) { + InstrProfRecord Record1("func1", 0x1234, {97531}); + InstrProfRecord Record2("func2", 0x1234, {0, 0}); + InstrProfRecord Record3("func3", 0x1234, + {2305843009213693952, 1152921504606846976, + 576460752303423488, 288230376151711744, + 144115188075855872, 72057594037927936}); + InstrProfRecord Record4("func4", 0x1234, {0}); + Writer.addRecord(std::move(Record1)); + Writer.addRecord(std::move(Record2)); + Writer.addRecord(std::move(Record3)); + Writer.addRecord(std::move(Record4)); + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + ProfileSummary &PS = Reader->getSummary(); + ASSERT_EQ(2305843009213693952U, PS.getMaxFunctionCount()); + ASSERT_EQ(2305843009213693952U, PS.getMaxBlockCount()); + ASSERT_EQ(10U, PS.getNumBlocks()); + ASSERT_EQ(4539628424389557499U, PS.getTotalCount()); + std::vector &Details = PS.getDetailedSummary(); + uint32_t Cutoff = 800000; + auto Predicate = [&Cutoff](const ProfileSummaryEntry &PE) { + return PE.Cutoff == Cutoff; + }; + auto EightyPerc = std::find_if(Details.begin(), Details.end(), Predicate); + Cutoff = 900000; + auto NinetyPerc = std::find_if(Details.begin(), Details.end(), Predicate); + Cutoff = 950000; + auto NinetyFivePerc = std::find_if(Details.begin(), Details.end(), Predicate); + Cutoff = 990000; + auto NinetyNinePerc = std::find_if(Details.begin(), Details.end(), Predicate); + ASSERT_EQ(576460752303423488U, EightyPerc->MinBlockCount); + ASSERT_EQ(288230376151711744U, NinetyPerc->MinBlockCount); + ASSERT_EQ(288230376151711744U, NinetyFivePerc->MinBlockCount); + ASSERT_EQ(72057594037927936U, NinetyNinePerc->MinBlockCount); +} + TEST_F(InstrProfTest, get_icall_data_read_write) { InstrProfRecord Record1("caller", 0x1234, {1, 2}); InstrProfRecord Record2("callee1", 0x1235, {3, 4});