diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -493,7 +493,11 @@ virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; }; /// Return whether names in the profile are all MD5 numbers. - virtual bool useMD5() { return false; } + bool useMD5() const { return ProfileIsMD5; } + + /// Force the profile to use MD5 in Sample contexts, even if function names + /// are present. + virtual void setProfileUseMD5() { ProfileIsMD5 = true; } /// Don't read profile without context if the flag is set. This is only meaningful /// for ExtBinary format. @@ -563,6 +567,10 @@ /// Zero out the discriminator bits higher than bit MaskedBitFrom (0 based). /// The default is to keep all the bits. uint32_t MaskedBitFrom = 31; + + /// Whether the profile uses MD5 for Sample Contexts and function names. This + /// can be one-way overriden by the user to force use MD5. + bool ProfileIsMD5 = false; }; class SampleProfileReaderText : public SampleProfileReader { @@ -579,6 +587,9 @@ /// Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); + /// Text format sample profile does not support MD5 for now. + void setProfileUseMD5() override {} + private: /// CSNameTable is used to save full context vectors. This serves as an /// underlying immutable buffer for all clients. @@ -641,7 +652,10 @@ std::error_code readSummary(); /// Read the whole name table. - virtual std::error_code readNameTable(); + std::error_code readNameTable(); + + /// Read a string indirectly via the name table. + ErrorOr readStringFromTable(); /// Points to the current location in the buffer. const uint8_t *Data = nullptr; @@ -652,8 +666,18 @@ /// Function name table. std::vector NameTable; - /// Read a string indirectly via the name table. - virtual ErrorOr readStringFromTable(); + /// If MD5 is used in NameTable section, the section saves uint64_t data. + /// The uint64_t data has to be converted to a string and then the string + /// will be used to initialize StringRef in NameTable. + /// Note NameTable contains StringRef so it needs another buffer to own + /// the string data. MD5StringBuf serves as the string buffer that is + /// referenced by NameTable (vector of StringRef). We make sure + /// the lifetime of MD5StringBuf is not shorter than that of NameTable. + std::vector MD5StringBuf; + + /// The starting address of NameTable containing fixed length MD5. + const uint8_t *MD5NameMemStart = nullptr; + virtual ErrorOr readSampleContextFromTable(); private: @@ -712,8 +736,7 @@ FunctionSamples *FProfile); std::error_code readFuncOffsetTable(); std::error_code readFuncProfiles(); - std::error_code readMD5NameTable(); - std::error_code readNameTableSec(bool IsMD5); + std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5); std::error_code readCSNameTableSec(); std::error_code readProfileSymbolList(); @@ -723,7 +746,6 @@ const SecHdrTableEntry &Entry); // placeholder for subclasses to dispatch their own section readers. virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0; - ErrorOr readStringFromTable() override; ErrorOr readSampleContextFromTable() override; ErrorOr readContextFromTable(); @@ -740,21 +762,6 @@ /// The set containing the functions to use when compiling a module. DenseSet FuncsToUse; - /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't - /// need to be read in up front and can be directly accessed using index. - bool FixedLengthMD5 = false; - /// The starting address of NameTable containing fixed length MD5. - const uint8_t *MD5NameMemStart = nullptr; - - /// If MD5 is used in NameTable section, the section saves uint64_t data. - /// The uint64_t data has to be converted to a string and then the string - /// will be used to initialize StringRef in NameTable. - /// Note NameTable contains StringRef so it needs another buffer to own - /// the string data. MD5StringBuf serves as the string buffer that is - /// referenced by NameTable (vector of StringRef). We make sure - /// the lifetime of MD5StringBuf is not shorter than that of NameTable. - std::unique_ptr> MD5StringBuf; - /// CSNameTable is used to save full context vectors. This serves as an /// underlying immutable buffer for all clients. std::unique_ptr> CSNameTable; @@ -783,9 +790,6 @@ /// the reader has been given a module. bool collectFuncsFromModule() override; - /// Return whether names in the profile are all MD5 numbers. - bool useMD5() override { return MD5StringBuf.get(); } - std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); }; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -530,7 +530,20 @@ if (std::error_code EC = Idx.getError()) return EC; - return NameTable[*Idx]; + // Lazy loading, if the string has not been materialized from memory storing + // MD5 values, then it is default initialized with the null pointer. This can + // only happen when using fixed length MD5, that bounds check is performed + // while parsing the name table to ensure MD5NameMemStart points to an array + // with enough MD5 entries. + StringRef &SR = NameTable[*Idx]; + if (!SR.data()) { + assert(MD5NameMemStart); + using namespace support; + uint64_t FID = endian::read( + MD5NameMemStart + (*Idx) * sizeof(uint64_t)); + SR = MD5StringBuf.emplace_back(std::to_string(FID)); + } + return SR; } ErrorOr SampleProfileReaderBinary::readSampleContextFromTable() { @@ -540,34 +553,6 @@ return SampleContext(*FName); } -ErrorOr SampleProfileReaderExtBinaryBase::readStringFromTable() { - if (!FixedLengthMD5) - return SampleProfileReaderBinary::readStringFromTable(); - - // read NameTable index. - auto Idx = readStringIndex(NameTable); - if (std::error_code EC = Idx.getError()) - return EC; - - // Check whether the name to be accessed has been accessed before, - // if not, read it from memory directly. - StringRef &SR = NameTable[*Idx]; - if (SR.empty()) { - const uint8_t *SavedData = Data; - Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t)); - auto FID = readUnencodedNumber(); - if (std::error_code EC = FID.getError()) - return EC; - // Save the string converted from uint64_t in MD5StringBuf. All the - // references to the name are all StringRefs refering to the string - // in MD5StringBuf. - MD5StringBuf->push_back(std::to_string(*FID)); - SR = MD5StringBuf->back(); - Data = SavedData; - } - return SR; -} - std::error_code SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { auto NumSamples = readNumber(); @@ -729,14 +714,15 @@ FunctionSamples::ProfileIsFS = ProfileIsFS = true; break; case SecNameTable: { - FixedLengthMD5 = + bool FixedLengthMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); - assert((!FixedLengthMD5 || UseMD5) && - "If FixedLengthMD5 is true, UseMD5 has to be true"); + // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire + // profile uses MD5 for function name matching in IPO passes. + ProfileIsMD5 = ProfileIsMD5 || UseMD5; FunctionSamples::HasUniqSuffix = hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); - if (std::error_code EC = readNameTableSec(UseMD5)) + if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5)) return EC; break; } @@ -1020,50 +1006,77 @@ auto Size = readNumber(); if (std::error_code EC = Size.getError()) return EC; - NameTable.reserve(*Size + NameTable.size()); + + // Normally if useMD5 is true, the name table should have MD5 values, not + // strings, however in the case that ExtBinary profile has multiple name + // tables mixing string and MD5, all of them have to be normalized to use MD5, + // because optimization passes can only handle either type. + bool UseMD5 = useMD5(); + if (UseMD5) + MD5StringBuf.reserve(MD5StringBuf.size() + *Size); + + NameTable.clear(); + NameTable.reserve(*Size); for (size_t I = 0; I < *Size; ++I) { auto Name(readString()); if (std::error_code EC = Name.getError()) return EC; - NameTable.push_back(*Name); + if (UseMD5) { + uint64_t FID = MD5Hash(*Name); + NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID))); + } else + NameTable.push_back(*Name); } return sampleprof_error::success; } -std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() { - auto Size = readNumber(); - if (std::error_code EC = Size.getError()) - return EC; - MD5StringBuf = std::make_unique>(); - MD5StringBuf->reserve(*Size); +std::error_code +SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5, + bool FixedLengthMD5) { if (FixedLengthMD5) { + if (IsMD5) + errs() << "If FixedLengthMD5 is true, UseMD5 has to be true"; + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + + assert(Data + (*Size) * sizeof(uint64_t) == End && + "Fixed length MD5 name table does not contain specified number of " + "entries"); + if (Data + (*Size) * sizeof(uint64_t) > End) + return sampleprof_error::truncated; + // Preallocate and initialize NameTable so we can check whether a name // index has been read before by checking whether the element in the // NameTable is empty, meanwhile readStringIndex can do the boundary // check using the size of NameTable. - NameTable.resize(*Size + NameTable.size()); - + MD5StringBuf.reserve(MD5StringBuf.size() + *Size); + NameTable.clear(); + NameTable.resize(*Size); MD5NameMemStart = Data; Data = Data + (*Size) * sizeof(uint64_t); return sampleprof_error::success; } - NameTable.reserve(*Size); - for (uint64_t I = 0; I < *Size; ++I) { - auto FID = readNumber(); - if (std::error_code EC = FID.getError()) + + if (IsMD5) { + assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here"); + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) return EC; - MD5StringBuf->push_back(std::to_string(*FID)); - // NameTable is a vector of StringRef. Here it is pushing back a - // StringRef initialized with the last string in MD5stringBuf. - NameTable.push_back(MD5StringBuf->back()); + + MD5StringBuf.reserve(MD5StringBuf.size() + *Size); + NameTable.clear(); + NameTable.reserve(*Size); + for (size_t I = 0; I < *Size; ++I) { + auto FID = readNumber(); + if (std::error_code EC = FID.getError()) + return EC; + NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID))); + } + return sampleprof_error::success; } - return sampleprof_error::success; -} -std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) { - if (IsMD5) - return readMD5NameTable(); return SampleProfileReaderBinary::readNameTable(); } diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-multiple-nametables.profdata b/llvm/test/tools/llvm-profdata/Inputs/sample-multiple-nametables.profdata new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@