Index: llvm/include/llvm/ProfileData/SampleProf.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProf.h +++ llvm/include/llvm/ProfileData/SampleProf.h @@ -165,7 +165,10 @@ // a new check in verifySecFlag. enum class SecNameTableFlags : uint32_t { SecFlagInValid = 0, - SecFlagMD5Name = (1 << 0) + SecFlagMD5Name = (1 << 0), + // Store MD5 in fixed length instead of ULEB128 so NameTable can be + // accessed like an array. + SecFlagFixedLengthMD5 = (1 << 1) }; enum class SecProfSummaryFlags : uint32_t { SecFlagInValid = 0, Index: llvm/include/llvm/ProfileData/SampleProfReader.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfReader.h +++ llvm/include/llvm/ProfileData/SampleProfReader.h @@ -613,6 +613,7 @@ const SecHdrTableEntry &Entry); // placeholder for subclasses to dispatch their own section readers. virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0; + virtual ErrorOr readStringFromTable() override; std::unique_ptr ProfSymList; @@ -624,6 +625,12 @@ /// Use all functions from the input profile. bool UseAllFuncs = true; + /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't + /// need to be read in up front and can be directly accessed using index. + bool FixedLengthMD5 = false; + /// The starting address of NameTable containing fixed length MD5. + const uint8_t *MD5NameMemStart = nullptr; + /// If MD5 is used in NameTable section, the section saves uint64_t data. /// The uint64_t data has to be converted to a string and then the string /// will be used to initialize StringRef in NameTable. @@ -651,10 +658,7 @@ void collectFuncsFrom(const Module &M) override; /// Return whether names in the profile are all MD5 numbers. - virtual bool useMD5() override { - assert(!NameTable.empty() && "NameTable should have been initialized"); - return MD5StringBuf && !MD5StringBuf->empty(); - } + virtual bool useMD5() override { return MD5StringBuf.get(); } virtual std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); Index: llvm/include/llvm/ProfileData/SampleProfWriter.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfWriter.h +++ llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -158,6 +158,9 @@ virtual void setUseMD5() override { UseMD5 = true; addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name); + // MD5 will be stored as plain uint64_t instead of variable-length + // quantity format in NameTable section. + addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagFixedLengthMD5); } // Set the profile to be partial. It means the profile is for Index: llvm/lib/ProfileData/SampleProfReader.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfReader.cpp +++ llvm/lib/ProfileData/SampleProfReader.cpp @@ -354,6 +354,34 @@ return NameTable[*Idx]; } +ErrorOr SampleProfileReaderExtBinaryBase::readStringFromTable() { + if (!FixedLengthMD5) + return SampleProfileReaderBinary::readStringFromTable(); + + // read NameTable index. + auto Idx = readStringIndex(NameTable); + if (std::error_code EC = Idx.getError()) + return EC; + + // Check whether the name to be accessed has been accessed before, + // if not, read it from memory directly. + StringRef &SR = NameTable[*Idx]; + if (SR.empty()) { + const uint8_t *SavedData = Data; + Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t)); + auto FID = readUnencodedNumber(); + if (std::error_code EC = FID.getError()) + return EC; + // Save the string converted from uint64_t in MD5StringBuf. All the + // references to the name are all StringRefs refering to the string + // in MD5StringBuf. + MD5StringBuf->push_back(std::to_string(*FID)); + SR = MD5StringBuf->back(); + Data = SavedData; + } + return SR; +} + ErrorOr SampleProfileReaderCompactBinary::readStringFromTable() { auto Idx = readStringIndex(NameTable); if (std::error_code EC = Idx.getError()) @@ -481,11 +509,16 @@ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) Summary->setPartialProfile(true); break; - case SecNameTable: - if (std::error_code EC = readNameTableSec( - hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))) + case SecNameTable: { + FixedLengthMD5 = + hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); + bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); + assert((!FixedLengthMD5 || UseMD5) && + "If FixedLengthMD5 is true, UseMD5 has to be true"); + if (std::error_code EC = readNameTableSec(UseMD5)) return EC; break; + } case SecLBRProfile: if (std::error_code EC = readFuncProfiles()) return EC; @@ -726,9 +759,19 @@ auto Size = readNumber(); if (std::error_code EC = Size.getError()) return EC; - NameTable.reserve(*Size); MD5StringBuf = std::make_unique>(); MD5StringBuf->reserve(*Size); + if (FixedLengthMD5) { + // Preallocate and initialize NameTable so we can check whether a name + // index has been read before by checking whether the element in the + // NameTable is empty, meanwhile readStringIndex can do the boundary + // check using the size of NameTable. + NameTable.resize(*Size + NameTable.size()); + + MD5NameMemStart = Data; + Data = Data + (*Size) * sizeof(uint64_t); + return sampleprof_error::success; + } for (uint32_t I = 0; I < *Size; ++I) { auto FID = readNumber(); if (std::error_code EC = FID.getError()) @@ -844,7 +887,9 @@ switch (Entry.Type) { case SecNameTable: - if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) + if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) + Flags.append("fixlenmd5,"); + else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) Flags.append("md5,"); break; case SecProfSummary: Index: llvm/lib/ProfileData/SampleProfWriter.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfWriter.cpp +++ llvm/lib/ProfileData/SampleProfWriter.cpp @@ -174,11 +174,13 @@ std::set V; stablizeNameTable(V); - // Write out the name table. + // Write out the MD5 name table. We wrote unencoded MD5 so reader can + // retrieve the name using the name index without having to read the + // whole name table. encodeULEB128(NameTable.size(), OS); - for (auto N : V) { - encodeULEB128(MD5Hash(N), OS); - } + support::endian::Writer Writer(OS, support::little); + for (auto N : V) + Writer.write(MD5Hash(N)); return sampleprof_error::success; } Index: llvm/test/Transforms/SampleProfile/profile-format.ll =================================================================== --- llvm/test/Transforms/SampleProfile/profile-format.ll +++ llvm/test/Transforms/SampleProfile/profile-format.ll @@ -6,6 +6,8 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.extbinary.afdo -S | FileCheck %s ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.md5extbinary.afdo -S | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.md5extbinary.afdo -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.fixlenmd5.extbinary.afdo -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.fixlenmd5.extbinary.afdo -S | FileCheck %s ; Original C++ test case ; Index: llvm/test/tools/llvm-profdata/show-prof-info.test =================================================================== --- llvm/test/tools/llvm-profdata/show-prof-info.test +++ llvm/test/tools/llvm-profdata/show-prof-info.test @@ -7,6 +7,6 @@ ; To check llvm-profdata shows the correct flags for ProfileSummarySection. ; CHECK: ProfileSummarySection {{.*}} Flags: {compressed,partial} ; To check llvm-profdata shows the correct flags for NameTableSection. -; CHECK: NameTableSection {{.*}} Flags: {compressed,md5} +; CHECK: NameTableSection {{.*}} Flags: {compressed,fixlenmd5} ; To check llvm-profdata shows the correct file size. ; CHECK: [[FILESIZE]]