diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -493,7 +493,11 @@ virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; }; /// Return whether names in the profile are all MD5 numbers. - virtual bool useMD5() { return false; } + bool useMD5() { return ProfileIsMD5; } + + /// Force the profile to use MD5 in Sample contexts, even if function names + /// are present. + virtual void setProfileUseMD5() { ProfileIsMD5 = true; } /// Don't read profile without context if the flag is set. This is only meaningful /// for ExtBinary format. @@ -563,6 +567,10 @@ /// Zero out the discriminator bits higher than bit MaskedBitFrom (0 based). /// The default is to keep all the bits. uint32_t MaskedBitFrom = 31; + + /// Whether the profile uses MD5 for Sample Conexts for function names. This + /// can be one-way overriden by the user to force use MD5. + bool ProfileIsMD5 = false; }; class SampleProfileReaderText : public SampleProfileReader { @@ -579,6 +587,10 @@ /// Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); + void setProfileUseMD5() override { + assert(false && "Text format sample profile does not support MD5"); + } + private: /// CSNameTable is used to save full context vectors. This serves as an /// underlying immutable buffer for all clients. @@ -640,8 +652,11 @@ /// Read profile summary. std::error_code readSummary(); - /// Read the whole name table. - virtual std::error_code readNameTable(); + /// Read the whole name table with regular strings. + std::error_code readNameTable(); + + /// Read the whole name table with ULEB128 encoded MD5 values. + std::error_code readMD5NameTable(); /// Points to the current location in the buffer. const uint8_t *Data = nullptr; @@ -652,8 +667,20 @@ /// Function name table. std::vector NameTable; + /// If MD5 is used in NameTable section, the section saves uint64_t data. + /// The uint64_t data has to be converted to a string and then the string + /// will be used to initialize StringRef in NameTable. + /// Note NameTable contains StringRef so it needs another buffer to own + /// the string data. MD5StringBuf serves as the string buffer that is + /// referenced by NameTable (vector of StringRef). We make sure + /// the lifetime of MD5StringBuf is not shorter than that of NameTable. + std::vector MD5StringBuf; + + /// The starting address of NameTable containing fixed length MD5. + const uint8_t *MD5NameMemStart = nullptr; + /// Read a string indirectly via the name table. - virtual ErrorOr readStringFromTable(); + ErrorOr readStringFromTable(); virtual ErrorOr readSampleContextFromTable(); private: @@ -722,7 +749,6 @@ const SecHdrTableEntry &Entry); // placeholder for subclasses to dispatch their own section readers. virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0; - ErrorOr readStringFromTable() override; ErrorOr readSampleContextFromTable() override; ErrorOr readContextFromTable(); @@ -739,18 +765,6 @@ /// The set containing the functions to use when compiling a module. DenseSet FuncsToUse; - /// The starting address of NameTable containing fixed length MD5. - const uint8_t *MD5NameMemStart = nullptr; - - /// If MD5 is used in NameTable section, the section saves uint64_t data. - /// The uint64_t data has to be converted to a string and then the string - /// will be used to initialize StringRef in NameTable. - /// Note NameTable contains StringRef so it needs another buffer to own - /// the string data. MD5StringBuf serves as the string buffer that is - /// referenced by NameTable (vector of StringRef). We make sure - /// the lifetime of MD5StringBuf is not shorter than that of NameTable. - std::unique_ptr> MD5StringBuf; - /// CSNameTable is used to save full context vectors. This serves as an /// underlying immutable buffer for all clients. std::unique_ptr> CSNameTable; @@ -779,9 +793,6 @@ /// the reader has been given a module. bool collectFuncsFromModule() override; - /// Return whether names in the profile are all MD5 numbers. - bool useMD5() override { return MD5StringBuf.get(); } - std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); }; @@ -809,24 +820,21 @@ class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { private: - /// Function name table. - std::vector NameTable; /// The table mapping from function name to the offset of its FunctionSample /// towards file start. DenseMap FuncOffsetTable; /// The set containing the functions to use when compiling a module. DenseSet FuncsToUse; std::error_code verifySPMagic(uint64_t Magic) override; - std::error_code readNameTable() override; - /// Read a string indirectly via the name table. - ErrorOr readStringFromTable() override; std::error_code readHeader() override; std::error_code readFuncOffsetTable(); public: SampleProfileReaderCompactBinary(std::unique_ptr B, LLVMContext &C) - : SampleProfileReaderBinary(std::move(B), C, SPF_Compact_Binary) {} + : SampleProfileReaderBinary(std::move(B), C, SPF_Compact_Binary) { + ProfileIsMD5 = true; + } /// \brief Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); @@ -837,9 +845,6 @@ /// Collect functions with definitions in Module M. Return true if /// the reader has been given a module. bool collectFuncsFromModule() override; - - /// Return whether names in the profile are all MD5 numbers. - bool useMD5() override { return true; } }; using InlineCallStack = SmallVector; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -530,7 +530,18 @@ if (std::error_code EC = Idx.getError()) return EC; - return NameTable[*Idx]; + // Check whether the name to be accessed has been accessed before, + // if not, read it from memory directly. + StringRef &SR = NameTable[*Idx]; + if (!SR.data()) { + assert(MD5NameMemStart); + using namespace support; + uint64_t FID = endian::read( + reinterpret_cast(MD5NameMemStart + + (*Idx) * sizeof(uint64_t))); + SR = MD5StringBuf.emplace_back(std::to_string(FID)); + } + return SR; } ErrorOr SampleProfileReaderBinary::readSampleContextFromTable() { @@ -540,44 +551,6 @@ return SampleContext(*FName); } -ErrorOr SampleProfileReaderExtBinaryBase::readStringFromTable() { - // read NameTable index. - auto Idx = readStringIndex(NameTable); - if (std::error_code EC = Idx.getError()) - return EC; - - // Check whether the name to be accessed has been accessed before, - // if not, read it from memory directly. - StringRef &SR = NameTable[*Idx]; - if (!SR.data()) { - assert(MD5NameMemStart); - const uint8_t *SavedData = Data; - const uint8_t *SavedEnd = End; - Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t)); - End = reinterpret_cast( - std::numeric_limits::max()); - auto FID = readUnencodedNumber(); - if (std::error_code EC = FID.getError()) - return EC; - // Save the string converted from uint64_t in MD5StringBuf. All the - // references to the name are all StringRefs refering to the string - // in MD5StringBuf. - MD5StringBuf->push_back(std::to_string(*FID)); - SR = MD5StringBuf->back(); - Data = SavedData; - End = SavedEnd; - } - return SR; -} - -ErrorOr SampleProfileReaderCompactBinary::readStringFromTable() { - auto Idx = readStringIndex(NameTable); - if (std::error_code EC = Idx.getError()) - return EC; - - return StringRef(NameTable[*Idx]); -} - std::error_code SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { auto NumSamples = readNumber(); @@ -1072,57 +1045,76 @@ auto Size = readNumber(); if (std::error_code EC = Size.getError()) return EC; + + // Normally if useMD5 is true, the name table should have MD5 values, not + // strings, however in the case that ExtBinary profile has multiple name + // tables mixing string and MD5, all of them have to be normalized to use MD5, + // because optimization passes can only handle either type. + bool UseMD5 = useMD5(); + if (UseMD5) + MD5StringBuf.reserve(MD5StringBuf.size() + *Size); + NameTable.clear(); NameTable.reserve(*Size); for (size_t I = 0; I < *Size; ++I) { auto Name(readString()); if (std::error_code EC = Name.getError()) return EC; - NameTable.push_back(*Name); + if (UseMD5) { + uint64_t MD5 = MD5Hash(*Name); + NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(MD5))); + } else + NameTable.push_back(*Name); } return sampleprof_error::success; } +std::error_code SampleProfileReaderBinary::readMD5NameTable() { + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + + MD5StringBuf.reserve(MD5StringBuf.size() + *Size); + NameTable.clear(); + NameTable.reserve(*Size); + for (size_t I = 0; I < *Size; ++I) { + auto FID = readNumber(); + if (std::error_code EC = FID.getError()) + return EC; + NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID))); + } + return sampleprof_error::success; +} + std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5, bool FixedLengthMD5) { - if (!IsMD5) - return SampleProfileReaderBinary::readNameTable(); + if (!FixedLengthMD5) { + if (!IsMD5) + return SampleProfileReaderBinary::readNameTable(); + else + return SampleProfileReaderBinary::readMD5NameTable(); + } auto Size = readNumber(); if (std::error_code EC = Size.getError()) return EC; - MD5StringBuf = std::make_unique>(); - MD5StringBuf->reserve(*Size); - if (FixedLengthMD5) { - assert(Data + (*Size) * sizeof(uint64_t) == End && - "Fixed length MD5 name table does not contain specified number of " - "entries"); - if (Data + (*Size) * sizeof(uint64_t) > End) - return sampleprof_error::truncated; - // Preallocate and initialize NameTable so we can check whether a name - // index has been read before by checking whether the element in the - // NameTable is empty, meanwhile readStringIndex can do the boundary - // check using the size of NameTable. - NameTable.resize(*Size); + assert(Data + (*Size) * sizeof(uint64_t) == End && + "Fixed length MD5 name table does not contain specified number of " + "entries"); + if (Data + (*Size) * sizeof(uint64_t) > End) + return sampleprof_error::truncated; - MD5NameMemStart = Data; - Data = Data + (*Size) * sizeof(uint64_t); - return sampleprof_error::success; - } + // Preallocate and initialize NameTable so we can check whether a name + // index has been read before by checking whether the element in the + // NameTable is empty, meanwhile readStringIndex can do the boundary + // check using the size of NameTable. NameTable.clear(); - NameTable.reserve(*Size); - for (uint64_t I = 0; I < *Size; ++I) { - auto FID = readNumber(); - if (std::error_code EC = FID.getError()) - return EC; - MD5StringBuf->push_back(std::to_string(*FID)); - // NameTable is a vector of StringRef. Here it is pushing back a - // StringRef initialized with the last string in MD5stringBuf. - NameTable.push_back(MD5StringBuf->back()); - } + NameTable.resize(*Size); + MD5NameMemStart = Data; + Data = Data + (*Size) * sizeof(uint64_t); return sampleprof_error::success; } @@ -1244,21 +1236,6 @@ return sampleprof_error::success; } -std::error_code SampleProfileReaderCompactBinary::readNameTable() { - auto Size = readNumber(); - if (std::error_code EC = Size.getError()) - return EC; - NameTable.clear(); - NameTable.reserve(*Size); - for (uint64_t I = 0; I < *Size; ++I) { - auto FID = readNumber(); - if (std::error_code EC = FID.getError()) - return EC; - NameTable.push_back(std::to_string(*FID)); - } - return sampleprof_error::success; -} - std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) { SecHdrTableEntry Entry; @@ -1311,26 +1288,23 @@ if (std::error_code EC = readSecHdrTable()) return EC; -#ifndef NDEBUG - bool HasMD5 = false; bool HasNonMD5 = false; for (auto &Entry : SecHdrTable) { if (Entry.Size && Entry.Type == SecNameTable) { bool IsMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name) || hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); if (IsMD5) { - HasMD5 = true; + ProfileIsMD5 = true; } else HasNonMD5 = true; } } - if (HasMD5 != HasNonMD5) { - assert(!(HasMD5 && HasNonMD5) && + if (ProfileIsMD5 != HasNonMD5) { + assert(!(ProfileIsMD5 && HasNonMD5) && "Profile contains both MD5 and non-MD5 function names, non-MD5 " "function names will be dropped"); - assert((HasMD5 || HasNonMD5) && "Profile contains no name table"); + assert((ProfileIsMD5 || HasNonMD5) && "Profile contains no name table"); } -#endif return sampleprof_error::success; } @@ -1460,9 +1434,20 @@ } std::error_code SampleProfileReaderCompactBinary::readHeader() { - SampleProfileReaderBinary::readHeader(); - if (std::error_code EC = readFuncOffsetTable()) + Data = reinterpret_cast(Buffer->getBufferStart()); + End = Data + Buffer->getBufferSize(); + + if (std::error_code EC = readMagicIdent()) return EC; + + if (std::error_code EC = readSummary()) + return EC; + + if (std::error_code EC = readMD5NameTable()) + return EC; + + if (std::error_code EC = readFuncOffsetTable()) + return EC; return sampleprof_error::success; }