Index: llvm/trunk/include/llvm/ProfileData/InstrProf.h =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProf.h +++ llvm/trunk/include/llvm/ProfileData/InstrProf.h @@ -576,6 +576,10 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, ValueProfData *Dst); +namespace IndexedInstrProf { +struct Summary; +} + ///// Profile summary computation //// // The 'show' command displays richer summary of the profile data. The profile // summary is one or more (Cutoff, MinBlockCount, NumBlocks) triplets. Given a @@ -585,6 +589,10 @@ uint32_t Cutoff; ///< The required percentile of total execution count. uint64_t MinBlockCount; ///< The minimum execution count for this percentile. uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. + ProfileSummaryEntry(uint32_t TheCutoff, uint64_t TheMinBlockCount, + uint64_t TheNumBlocks) + : Cutoff(TheCutoff), MinBlockCount(TheMinBlockCount), + NumBlocks(TheNumBlocks) {} }; class ProfileSummary { @@ -598,15 +606,17 @@ uint64_t MaxBlockCount, MaxInternalBlockCount, MaxFunctionCount; uint32_t NumBlocks, NumFunctions; inline void addCount(uint64_t Count, bool IsEntry); - void computeDetailedSummary(); public: static const int Scale = 1000000; ProfileSummary(std::vector Cutoffs) : DetailedSummaryCutoffs(Cutoffs), TotalCount(0), MaxBlockCount(0), - MaxInternalBlockCount(0), MaxFunctionCount(0), NumBlocks(0), NumFunctions(0) {} + MaxInternalBlockCount(0), MaxFunctionCount(0), NumBlocks(0), + NumFunctions(0) {} + ProfileSummary(const IndexedInstrProf::Summary &S); inline void addRecord(const InstrProfRecord &); inline std::vector &getDetailedSummary(); + void computeDetailedSummary(); uint32_t getNumBlocks() { return NumBlocks; } uint64_t getTotalCount() { return TotalCount; } uint32_t getNumFunctions() { return NumFunctions; } @@ -684,7 +694,10 @@ // Version 3 supports value profile data. The value profile data is expected // to follow the block counter profile data. Version3 = 3, - // The current version is 3. + // In this version, profile summary data \c IndexedInstrProf::Summary is + // stored after the profile header. + Version4 = 4, + // The current version is 4. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -698,11 +711,100 @@ struct Header { uint64_t Magic; uint64_t Version; - uint64_t MaxFunctionCount; + uint64_t Unused; // Becomes unused since version 4 uint64_t HashType; uint64_t HashOffset; }; +static const uint32_t SummaryCutoffs[] = { + 10000, /* 1% */ + 100000, /* 10% */ + 200000, 300000, 400000, 500000, 600000, 500000, 600000, 700000, + 800000, 900000, 950000, 990000, 999000, 999900, 999990, 999999}; +static const uint32_t NumSummaryCutoffs = + sizeof(SummaryCutoffs) / sizeof(*SummaryCutoffs); + +// Profile summary data recorded in the profile data file in indexed +// format. It is introduced in version 4. The summary data follows +// right after the profile file header. +struct Summary { + + struct Entry { + uint64_t Cutoff; ///< The required percentile of total execution count. + uint64_t + MinBlockCount; ///< The minimum execution count for this percentile. + uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. + }; + // New field kind to existing enum value mapping should remain unchanged + // when new kind is added in the future. + enum SummaryFieldKind { + /// The total number of functions instrumented. + TotalNumFunctions = 0, + /// Total number of instrumented blocks/edges. + TotalNumBlocks = 1, + /// The maximal execution count among all functions. + /// This field does not exist for profile data from IR based + /// instrumentation. + MaxFunctionCount = 2, + /// Max block count of the program. + MaxBlockCount = 3, + /// Max internal block count of the program (excluding entry blocks). + MaxInternalBlockCount = 4, + /// The sum of all instrumented block counts. + TotalBlockCount = 5, + NumKinds = TotalBlockCount + 1 + }; + + // The number of summmary fields following the summary header. + uint64_t NumSummaryFields; + // The number of Cutoff Entries (Summary::Entry) following summary fields. + uint64_t NumCutoffEntries; + + static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) { + return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) + + NumSumFields * sizeof(uint64_t); + } + + const uint64_t *getSummaryDataBase() const { + return reinterpret_cast(this + 1); + } + uint64_t *getSummaryDataBase() { + return reinterpret_cast(this + 1); + } + const Entry *getCutoffEntryBase() const { + return reinterpret_cast( + &getSummaryDataBase()[NumSummaryFields]); + } + Entry *getCutoffEntryBase() { + return reinterpret_cast(&getSummaryDataBase()[NumSummaryFields]); + } + + uint64_t get(SummaryFieldKind K) const { + return getSummaryDataBase()[K]; + } + + void set(SummaryFieldKind K, uint64_t V) { + getSummaryDataBase()[K] = V; + } + + const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; } + void setEntry(uint32_t I, const ProfileSummaryEntry &E) { + Entry &ER = getCutoffEntryBase()[I]; + ER.Cutoff = E.Cutoff; + ER.MinBlockCount = E.MinBlockCount; + ER.NumBlocks = E.NumBlocks; + } + + Summary(uint32_t Size) { memset(this, 0, Size); } + void operator delete(void *ptr) { ::operator delete(ptr); } + + Summary() = delete; +}; + +inline std::unique_ptr allocSummary(uint32_t TotalSize) { + return std::unique_ptr(new (::operator new(TotalSize)) + Summary(TotalSize)); +} } // end namespace IndexedInstrProf namespace RawInstrProf { Index: llvm/trunk/include/llvm/ProfileData/InstrProfData.inc =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProfData.inc +++ llvm/trunk/include/llvm/ProfileData/InstrProfData.inc @@ -694,7 +694,7 @@ /* Raw profile format version. */ #define INSTR_PROF_RAW_VERSION 2 -#define INSTR_PROF_INDEX_VERSION 3 +#define INSTR_PROF_INDEX_VERSION 4 #define INSTR_PROF_COVMAP_VERSION 0 /* Profile version is always of type uint64_t. Reserve the upper 8 bits in the Index: llvm/trunk/include/llvm/ProfileData/InstrProfReader.h =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProfReader.h +++ llvm/trunk/include/llvm/ProfileData/InstrProfReader.h @@ -336,12 +336,17 @@ std::unique_ptr DataBuffer; /// The index into the profile data. std::unique_ptr Index; - /// The maximal execution count among all functions. - uint64_t MaxFunctionCount; + /// Profile summary data. + std::unique_ptr Summary; IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; + // Read the profile summary. Return a pointer pointing to one byte past the + // end of the summary data if it exists or the input \c Cur. + const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, + const unsigned char *Cur); + public: uint64_t getVersion() const { return Index->getVersion(); } IndexedInstrProfReader(std::unique_ptr DataBuffer) @@ -365,7 +370,7 @@ std::vector &Counts); /// Return the maximum of all known function counts. - uint64_t getMaximumFunctionCount() { return MaxFunctionCount; } + uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); } /// Factory method to create an indexed reader. static ErrorOr> @@ -383,6 +388,7 @@ // to be used by llvm-profdata (for dumping). Avoid using this when // the client is the compiler. InstrProfSymtab &getSymtab() override; + ProfileSummary &getSummary() { return *(Summary.get()); } }; } // end namespace llvm Index: llvm/trunk/include/llvm/ProfileData/InstrProfWriter.h =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProfWriter.h +++ llvm/trunk/include/llvm/ProfileData/InstrProfWriter.h @@ -34,7 +34,6 @@ private: bool Sparse; StringMap FunctionData; - uint64_t MaxFunctionCount; // Use raw pointer here for the incomplete type object. InstrProfRecordWriterTrait *InfoObj; Index: llvm/trunk/lib/ProfileData/InstrProf.cpp =================================================================== --- llvm/trunk/lib/ProfileData/InstrProf.cpp +++ llvm/trunk/lib/ProfileData/InstrProf.cpp @@ -641,4 +641,19 @@ } } +ProfileSummary::ProfileSummary(const IndexedInstrProf::Summary &S) + : TotalCount(S.get(IndexedInstrProf::Summary::TotalBlockCount)), + MaxBlockCount(S.get(IndexedInstrProf::Summary::MaxBlockCount)), + MaxInternalBlockCount( + S.get(IndexedInstrProf::Summary::MaxInternalBlockCount)), + MaxFunctionCount(S.get(IndexedInstrProf::Summary::MaxFunctionCount)), + NumBlocks(S.get(IndexedInstrProf::Summary::TotalNumBlocks)), + NumFunctions(S.get(IndexedInstrProf::Summary::TotalNumFunctions)) { + for (unsigned I = 0; I < S.NumCutoffEntries; I++) { + const IndexedInstrProf::Summary::Entry &Ent = S.getEntry(I); + DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, + Ent.NumBlocks); + } +} + } // end namespace llvm Index: llvm/trunk/lib/ProfileData/InstrProfReader.cpp =================================================================== --- llvm/trunk/lib/ProfileData/InstrProfReader.cpp +++ llvm/trunk/lib/ProfileData/InstrProfReader.cpp @@ -554,6 +554,41 @@ return Magic == IndexedInstrProf::Magic; } +const unsigned char * +IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, + const unsigned char *Cur) { + using namespace support; + if (Version >= IndexedInstrProf::Version4) { + const IndexedInstrProf::Summary *SummaryInLE = + reinterpret_cast(Cur); + uint64_t NFields = + endian::byte_swap(SummaryInLE->NumSummaryFields); + uint64_t NEntries = + endian::byte_swap(SummaryInLE->NumCutoffEntries); + uint32_t SummarySize = + IndexedInstrProf::Summary::getSize(NFields, NEntries); + std::unique_ptr SummaryData = + IndexedInstrProf::allocSummary(SummarySize); + + const uint64_t *Src = reinterpret_cast(SummaryInLE); + uint64_t *Dst = reinterpret_cast(SummaryData.get()); + for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) + Dst[I] = endian::byte_swap(Src[I]); + + // initialize ProfileSummary using the SummaryData from disk. + this->Summary = llvm::make_unique(*(SummaryData.get())); + return Cur + SummarySize; + } else { + // For older version of profile data, we need to compute on the fly: + using namespace IndexedInstrProf; + std::vector Cutoffs(&SummaryCutoffs[0], + &SummaryCutoffs[NumSummaryCutoffs]); + this->Summary = llvm::make_unique(Cutoffs); + this->Summary->computeDetailedSummary(); + return Cur; + } +} + std::error_code IndexedInstrProfReader::readHeader() { const unsigned char *Start = (const unsigned char *)DataBuffer->getBufferStart(); @@ -576,9 +611,7 @@ if (FormatVersion > IndexedInstrProf::ProfVersion::CurrentVersion) return error(instrprof_error::unsupported_version); - // Read the maximal function count. - MaxFunctionCount = - endian::byte_swap(Header->MaxFunctionCount); + Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( Index: llvm/trunk/lib/ProfileData/InstrProfWriter.cpp =================================================================== --- llvm/trunk/lib/ProfileData/InstrProfWriter.cpp +++ llvm/trunk/lib/ProfileData/InstrProfWriter.cpp @@ -84,6 +84,7 @@ typedef uint64_t offset_type; support::endianness ValueProfDataEndianness; + ProfileSummary *TheProfileSummary; InstrProfRecordWriterTrait() : ValueProfDataEndianness(support::little) {} static hash_value_type ComputeHash(key_type_ref K) { @@ -122,6 +123,7 @@ endian::Writer LE(Out); for (const auto &ProfileData : *V) { const InstrProfRecord &ProfRecord = ProfileData.second; + TheProfileSummary->addRecord(ProfRecord); LE.write(ProfileData.first); // Function hash LE.write(ProfRecord.Counts.size()); @@ -140,7 +142,7 @@ } InstrProfWriter::InstrProfWriter(bool Sparse) - : Sparse(Sparse), FunctionData(), MaxFunctionCount(0), + : Sparse(Sparse), FunctionData(), InfoObj(new InstrProfRecordWriterTrait()) {} InstrProfWriter::~InstrProfWriter() { delete InfoObj; } @@ -179,11 +181,6 @@ Dest.sortValueData(); - // We keep track of the max function count as we go for simplicity. - // Update this statistic no matter the result of the merge. - if (Dest.Counts[0] > MaxFunctionCount) - MaxFunctionCount = Dest.Counts[0]; - return Result; } @@ -199,8 +196,32 @@ return false; } +static void setSummary(IndexedInstrProf::Summary *TheSummary, + ProfileSummary &PS) { + using namespace IndexedInstrProf; + std::vector &Res = PS.getDetailedSummary(); + TheSummary->NumSummaryFields = Summary::NumKinds; + TheSummary->NumCutoffEntries = Res.size(); + TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount()); + TheSummary->set(Summary::MaxBlockCount, PS.getMaxBlockCount()); + TheSummary->set(Summary::MaxInternalBlockCount, + PS.getMaxInternalBlockCount()); + TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount()); + TheSummary->set(Summary::TotalNumBlocks, PS.getNumBlocks()); + TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions()); + for (unsigned I = 0; I < Res.size(); I++) + TheSummary->setEntry(I, Res[I]); +} + void InstrProfWriter::writeImpl(ProfOStream &OS) { OnDiskChainedHashTableGenerator Generator; + + using namespace IndexedInstrProf; + std::vector Cutoffs(&SummaryCutoffs[0], + &SummaryCutoffs[NumSummaryCutoffs]); + ProfileSummary PS(Cutoffs); + InfoObj->TheProfileSummary = &PS; + // Populate the hash table generator. for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) @@ -209,7 +230,7 @@ IndexedInstrProf::Header Header; Header.Magic = IndexedInstrProf::Magic; Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; - Header.MaxFunctionCount = MaxFunctionCount; + Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); @@ -220,15 +241,37 @@ for (int I = 0; I < N - 1; I++) OS.write(reinterpret_cast(&Header)[I]); - // Save a space to write the hash table start location. - uint64_t HashTableStartLoc = OS.tell(); + // Save the location of Header.HashOffset field in \c OS. + uint64_t HashTableStartFieldOffset = OS.tell(); // Reserve the space for HashOffset field. OS.write(0); + + // Reserve space to write profile summary data. + uint32_t NumEntries = Cutoffs.size(); + uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); + // Remember the summary offset. + uint64_t SummaryOffset = OS.tell(); + for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) + OS.write(0); + // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); + // Allocate space for data to be serialized out. + std::unique_ptr TheSummary = + IndexedInstrProf::allocSummary(SummarySize); + // Compute the Summary and copy the data to the data + // structure to be serialized out (to disk or buffer). + setSummary(TheSummary.get(), PS); + InfoObj->TheProfileSummary = 0; + // Now do the final patch: - PatchItem PatchItems[1] = {{HashTableStartLoc, &HashTableStart, 1}}; + PatchItem PatchItems[] = { + // Patch the Header.HashOffset field. + {HashTableStartFieldOffset, &HashTableStart, 1}, + // Patch the summary data. + {SummaryOffset, reinterpret_cast(TheSummary.get()), + (int)(SummarySize / sizeof(uint64_t))}}; OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems)); } Index: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp =================================================================== --- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp +++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp @@ -137,6 +137,45 @@ ASSERT_TRUE(ErrorEquals(instrprof_error::unknown_function, EC)); } +// Profile data is copied from general.proftext +TEST_F(InstrProfTest, get_profile_summary) { + InstrProfRecord Record1("func1", 0x1234, {97531}); + InstrProfRecord Record2("func2", 0x1234, {0, 0}); + InstrProfRecord Record3("func3", 0x1234, + {2305843009213693952, 1152921504606846976, + 576460752303423488, 288230376151711744, + 144115188075855872, 72057594037927936}); + InstrProfRecord Record4("func4", 0x1234, {0}); + Writer.addRecord(std::move(Record1)); + Writer.addRecord(std::move(Record2)); + Writer.addRecord(std::move(Record3)); + Writer.addRecord(std::move(Record4)); + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + ProfileSummary &PS = Reader->getSummary(); + ASSERT_EQ(2305843009213693952U, PS.getMaxFunctionCount()); + ASSERT_EQ(2305843009213693952U, PS.getMaxBlockCount()); + ASSERT_EQ(10U, PS.getNumBlocks()); + ASSERT_EQ(4539628424389557499U, PS.getTotalCount()); + std::vector &Details = PS.getDetailedSummary(); + uint32_t Cutoff = 800000; + auto Predicate = [&Cutoff](const ProfileSummaryEntry &PE) { + return PE.Cutoff == Cutoff; + }; + auto EightyPerc = std::find_if(Details.begin(), Details.end(), Predicate); + Cutoff = 900000; + auto NinetyPerc = std::find_if(Details.begin(), Details.end(), Predicate); + Cutoff = 950000; + auto NinetyFivePerc = std::find_if(Details.begin(), Details.end(), Predicate); + Cutoff = 990000; + auto NinetyNinePerc = std::find_if(Details.begin(), Details.end(), Predicate); + ASSERT_EQ(576460752303423488U, EightyPerc->MinBlockCount); + ASSERT_EQ(288230376151711744U, NinetyPerc->MinBlockCount); + ASSERT_EQ(288230376151711744U, NinetyFivePerc->MinBlockCount); + ASSERT_EQ(72057594037927936U, NinetyNinePerc->MinBlockCount); +} + TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write) { InstrProfRecord Record1("caller", 0x1234, {1, 2}); InstrProfRecord Record2("callee1", 0x1235, {3, 4});