diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -165,7 +165,10 @@ // a new check in verifySecFlag. enum class SecNameTableFlags : uint32_t { SecFlagInValid = 0, - SecFlagMD5Name = (1 << 0) + SecFlagMD5Name = (1 << 0), + // Store MD5 in fixed length instead of ULEB128 so NameTable can be + // accessed like an array. + SecFlagFixedLengthMD5 = (1 << 1) }; enum class SecProfSummaryFlags : uint32_t { SecFlagInValid = 0, diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -618,6 +618,7 @@ const SecHdrTableEntry &Entry); // placeholder for subclasses to dispatch their own section readers. virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0; + virtual ErrorOr readStringFromTable() override; std::unique_ptr ProfSymList; @@ -629,6 +630,12 @@ /// Use all functions from the input profile. bool UseAllFuncs = true; + /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't + /// need to be read in up front and can be directly accessed using index. + bool FixedLengthMD5 = false; + /// The starting address of NameTable containing fixed length MD5. + const uint8_t *MD5NameMemStart = nullptr; + /// If MD5 is used in NameTable section, the section saves uint64_t data. /// The uint64_t data has to be converted to a string and then the string /// will be used to initialize StringRef in NameTable. @@ -656,10 +663,7 @@ void collectFuncsFrom(const Module &M) override; /// Return whether names in the profile are all MD5 numbers. - virtual bool useMD5() override { - assert(!NameTable.empty() && "NameTable should have been initialized"); - return MD5StringBuf && !MD5StringBuf->empty(); - } + virtual bool useMD5() override { return MD5StringBuf.get(); } virtual std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -158,6 +158,9 @@ virtual void setUseMD5() override { UseMD5 = true; addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name); + // MD5 will be stored as plain uint64_t instead of variable-length + // quantity format in NameTable section. + addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagFixedLengthMD5); } // Set the profile to be partial. It means the profile is for diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -367,6 +367,34 @@ return NameTable[*Idx]; } +ErrorOr SampleProfileReaderExtBinaryBase::readStringFromTable() { + if (!FixedLengthMD5) + return SampleProfileReaderBinary::readStringFromTable(); + + // read NameTable index. + auto Idx = readStringIndex(NameTable); + if (std::error_code EC = Idx.getError()) + return EC; + + // Check whether the name to be accessed has been accessed before, + // if not, read it from memory directly. + StringRef &SR = NameTable[*Idx]; + if (SR.empty()) { + const uint8_t *SavedData = Data; + Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t)); + auto FID = readUnencodedNumber(); + if (std::error_code EC = FID.getError()) + return EC; + // Save the string converted from uint64_t in MD5StringBuf. All the + // references to the name are all StringRefs refering to the string + // in MD5StringBuf. + MD5StringBuf->push_back(std::to_string(*FID)); + SR = MD5StringBuf->back(); + Data = SavedData; + } + return SR; +} + ErrorOr SampleProfileReaderCompactBinary::readStringFromTable() { auto Idx = readStringIndex(NameTable); if (std::error_code EC = Idx.getError()) @@ -494,11 +522,16 @@ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) Summary->setPartialProfile(true); break; - case SecNameTable: - if (std::error_code EC = readNameTableSec( - hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))) + case SecNameTable: { + FixedLengthMD5 = + hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); + bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); + assert((!FixedLengthMD5 || UseMD5) && + "If FixedLengthMD5 is true, UseMD5 has to be true"); + if (std::error_code EC = readNameTableSec(UseMD5)) return EC; break; + } case SecLBRProfile: if (std::error_code EC = readFuncProfiles()) return EC; @@ -739,9 +772,20 @@ auto Size = readNumber(); if (std::error_code EC = Size.getError()) return EC; - NameTable.reserve(*Size); MD5StringBuf = std::make_unique>(); MD5StringBuf->reserve(*Size); + if (FixedLengthMD5) { + // Preallocate and initialize NameTable so we can check whether a name + // index has been read before by checking whether the element in the + // NameTable is empty, meanwhile readStringIndex can do the boundary + // check using the size of NameTable. + NameTable.resize(*Size + NameTable.size()); + + MD5NameMemStart = Data; + Data = Data + (*Size) * sizeof(uint64_t); + return sampleprof_error::success; + } + NameTable.reserve(*Size); for (uint32_t I = 0; I < *Size; ++I) { auto FID = readNumber(); if (std::error_code EC = FID.getError()) @@ -857,7 +901,9 @@ switch (Entry.Type) { case SecNameTable: - if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) + if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) + Flags.append("fixlenmd5,"); + else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) Flags.append("md5,"); break; case SecProfSummary: diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -174,11 +174,13 @@ std::set V; stablizeNameTable(V); - // Write out the name table. + // Write out the MD5 name table. We wrote unencoded MD5 so reader can + // retrieve the name using the name index without having to read the + // whole name table. encodeULEB128(NameTable.size(), OS); - for (auto N : V) { - encodeULEB128(MD5Hash(N), OS); - } + support::endian::Writer Writer(OS, support::little); + for (auto N : V) + Writer.write(MD5Hash(N)); return sampleprof_error::success; } diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline.fixlenmd5.extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/inline.fixlenmd5.extbinary.afdo new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@