Index: llvm/include/llvm/ProfileData/SampleProf.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProf.h +++ llvm/include/llvm/ProfileData/SampleProf.h @@ -101,14 +101,14 @@ uint64_t('2') << (64 - 56) | uint64_t(Format); } -// Get the proper representation of a string in the input Format. -static inline StringRef getRepInFormat(StringRef Name, - SampleProfileFormat Format, +/// Get the proper representation of a string according to whether the +/// current Format uses MD5 to represent the string. +static inline StringRef getRepInFormat(StringRef Name, bool UseMD5, std::string &GUIDBuf) { if (Name.empty()) return Name; GUIDBuf = std::to_string(Function::getGUID(Name)); - return (Format == SPF_Compact_Binary) ? StringRef(GUIDBuf) : Name; + return UseMD5 ? StringRef(GUIDBuf) : Name; } static inline uint64_t SPVersion() { return 103; } @@ -154,7 +154,7 @@ uint64_t Size; }; -enum SecFlags { SecFlagInValid = 0, SecFlagCompress = (1 << 0) }; +enum SecFlags { SecFlagInValid = 0, SecFlagCompress = (1 << 0), SecFlagMD5Name = (1 << 1) }; static inline void addSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) { Entry.Flags |= Flags; @@ -164,7 +164,7 @@ Entry.Flags &= ~Flags; } -static inline bool hasSecFlag(SecHdrTableEntry &Entry, SecFlags Flag) { +static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlags Flag) { return Entry.Flags & Flag; } @@ -379,7 +379,7 @@ const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName) const { std::string CalleeGUID; - CalleeName = getRepInFormat(CalleeName, Format, CalleeGUID); + CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID); auto iter = CallsiteSamples.find(Loc); if (iter == CallsiteSamples.end()) @@ -527,13 +527,13 @@ } /// Translate \p Name into its original name in Module. - /// When the Format is not SPF_Compact_Binary, \p Name needs no translation. - /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples + /// When profile doesn't use MD5, \p Name needs no translation. + /// When profile uses MD5, \p Name in current FunctionSamples /// is actually GUID of the original function name. getNameInModule will /// translate \p Name in current FunctionSamples into its original name. /// If the original name doesn't exist in \p M, return empty StringRef. StringRef getNameInModule(StringRef Name, const Module *M) const { - if (Format != SPF_Compact_Binary) + if (!UseMD5) return Name; assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first"); @@ -560,16 +560,18 @@ static SampleProfileFormat Format; + /// Whether the profile uses MD5 to represent string. + static bool UseMD5; + /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for /// all the function symbols defined or declared in current module. DenseMap *GUIDToFuncNameMap = nullptr; // Assume the input \p Name is a name coming from FunctionSamples itself. - // If the format is SPF_Compact_Binary, the name is already a GUID and we + // If UseMD5 is true, the name is already a GUID and we // don't want to return the GUID of GUID. static uint64_t getGUID(StringRef Name) { - return (Format == SPF_Compact_Binary) ? std::stoull(Name.data()) - : Function::getGUID(Name); + return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name); } private: Index: llvm/include/llvm/ProfileData/SampleProfReader.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfReader.h +++ llvm/include/llvm/ProfileData/SampleProfReader.h @@ -335,6 +335,7 @@ return EC; if (Remapper) Remapper->applyRemapping(Ctx); + FunctionSamples::UseMD5 = useMD5(); return sampleprof_error::success; } @@ -363,7 +364,7 @@ FunctionSamples *getOrCreateSamplesFor(const Function &F) { std::string FGUID; StringRef CanonName = FunctionSamples::getCanonicalFnName(F); - CanonName = getRepInFormat(CanonName, getFormat(), FGUID); + CanonName = getRepInFormat(CanonName, useMD5(), FGUID); return &Profiles[CanonName]; } @@ -374,7 +375,7 @@ return FS; } std::string FGUID; - Fname = getRepInFormat(Fname, getFormat(), FGUID); + Fname = getRepInFormat(Fname, useMD5(), FGUID); auto It = Profiles.find(Fname); if (It != Profiles.end()) return &It->second; @@ -419,6 +420,9 @@ virtual std::vector *getNameTable() { return nullptr; } virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; }; + /// Return whether names in the profile are all MD5 numbers. + virtual bool useMD5() { return false; } + protected: /// Map every function to its associated profile. /// @@ -590,7 +594,7 @@ virtual std::error_code readHeader() override; virtual std::error_code verifySPMagic(uint64_t Magic) override = 0; virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, - SecType Type) = 0; + const SecHdrTableEntry &Entry) = 0; public: SampleProfileReaderExtBinaryBase(std::unique_ptr B, @@ -610,11 +614,14 @@ class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { private: virtual std::error_code verifySPMagic(uint64_t Magic) override; - virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, - SecType Type) override; + virtual std::error_code + readOneSection(const uint8_t *Start, uint64_t Size, + const SecHdrTableEntry &Entry) override; std::error_code readProfileSymbolList(); std::error_code readFuncOffsetTable(); std::error_code readFuncProfiles(); + std::error_code readMD5NameTable(); + std::error_code readNameTableSec(bool IsMD5); /// The table mapping from function name to the offset of its FunctionSample /// towards file start. @@ -624,6 +631,11 @@ /// Use all functions from the input profile. bool UseAllFuncs = true; + /// If MD5 is used in NameTable section, the section saves uint64_t data. + /// When reading NameTable, MD5Names is the place to save the strings + /// converted from uint64_t data. + std::unique_ptr> MD5Names; + public: SampleProfileReaderExtBinary(std::unique_ptr B, LLVMContext &C, SampleProfileFormat Format = SPF_Ext_Binary) @@ -638,6 +650,12 @@ /// Collect functions with definitions in Module \p M. void collectFuncsFrom(const Module &M) override; + + /// Return whether names in the profile are all MD5 numbers. + virtual bool useMD5() { + assert(!NameTable.empty() && "NameTable should have been initialized"); + return MD5Names && !MD5Names->empty(); + } }; class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { @@ -671,6 +689,9 @@ /// Collect functions to be used when compiling Module \p M. void collectFuncsFrom(const Module &M) override; + + /// Return whether names in the profile are all MD5 numbers. + virtual bool useMD5() { return true; } }; using InlineCallStack = SmallVector; Index: llvm/include/llvm/ProfileData/SampleProfWriter.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfWriter.h +++ llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -153,6 +153,7 @@ protected: uint64_t markSectionStart(SecType Type); std::error_code addNewSection(SecType Sec, uint64_t SectionStart); + void addSectionFlags(SecType Type, SecFlags Flags); virtual void initSectionHdrLayout() = 0; virtual std::error_code writeSections(const StringMap &ProfileMap) = 0; @@ -168,7 +169,6 @@ std::error_code writeSecHdrTable(); virtual std::error_code writeHeader(const StringMap &ProfileMap) override; - void addSectionFlags(SecType Type, SecFlags Flags); SecHdrTableEntry &getEntryInLayout(SecType Type); std::error_code compressAndOutput(); @@ -202,6 +202,12 @@ ProfSymList = PSL; }; + // Set to use MD5 to represent string in NameTable. + void setUseMD5() { + UseMD5 = true; + addSectionFlags(SecNameTable, SecFlagMD5Name); + } + private: virtual void initSectionHdrLayout() override { // Note that SecFuncOffsetTable section is written after SecLBRProfile @@ -222,6 +228,10 @@ }; virtual std::error_code writeSections(const StringMap &ProfileMap) override; + + std::error_code writeFuncOffsetTable(); + virtual std::error_code writeNameTable() override; + ProfileSymbolList *ProfSymList = nullptr; // Save the start of SecLBRProfile so we can compute the offset to the @@ -231,7 +241,8 @@ // FuncOffsetTable maps function name to its profile offset in SecLBRProfile // section. It is used to load function profile on demand. MapVector FuncOffsetTable; - std::error_code writeFuncOffsetTable(); + // Whether to use MD5 to represent string. + bool UseMD5 = false; }; // CompactBinary is a compact format of binary profile which both reduces Index: llvm/lib/ProfileData/SampleProf.cpp =================================================================== --- llvm/lib/ProfileData/SampleProf.cpp +++ llvm/lib/ProfileData/SampleProf.cpp @@ -30,6 +30,7 @@ namespace llvm { namespace sampleprof { SampleProfileFormat FunctionSamples::Format; +bool FunctionSamples::UseMD5; } // namespace sampleprof } // namespace llvm Index: llvm/lib/ProfileData/SampleProfReader.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfReader.cpp +++ llvm/lib/ProfileData/SampleProfReader.cpp @@ -470,18 +470,18 @@ return sampleprof_error::success; } -std::error_code -SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start, - uint64_t Size, SecType Type) { +std::error_code SampleProfileReaderExtBinary::readOneSection( + const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { Data = Start; End = Start + Size; - switch (Type) { + switch (Entry.Type) { case SecProfSummary: if (std::error_code EC = readSummary()) return EC; break; case SecNameTable: - if (std::error_code EC = readNameTable()) + if (std::error_code EC = + readNameTableSec(hasSecFlag(Entry, SecFlagMD5Name))) return EC; break; case SecLBRProfile: @@ -546,15 +546,28 @@ } } - for (auto NameOffset : FuncOffsetTable) { - auto FuncName = NameOffset.first; - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) - continue; - const uint8_t *FuncProfileAddr = Start + NameOffset.second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; + if (useMD5()) { + for (auto Name : FuncsToUse) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + } else { + for (auto NameOffset : FuncOffsetTable) { + auto FuncName = NameOffset.first; + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) + continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } } Data = End; @@ -628,7 +641,7 @@ SecSize = DecompressBufSize; } - if (std::error_code EC = readOneSection(SecStart, SecSize, Entry.Type)) + if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) return EC; if (Data != SecStart + SecSize) return sampleprof_error::malformed; @@ -705,6 +718,29 @@ return sampleprof_error::success; } +std::error_code SampleProfileReaderExtBinary::readMD5NameTable() { + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + NameTable.reserve(*Size); + MD5Names = std::make_unique>(); + MD5Names->reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + auto FID = readNumber(); + if (std::error_code EC = FID.getError()) + return EC; + MD5Names->push_back(std::to_string(*FID)); + NameTable.push_back(MD5Names->back()); + } + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinary::readNameTableSec(bool IsMD5) { + if (IsMD5) + return readMD5NameTable(); + return SampleProfileReaderBinary::readNameTable(); +} + std::error_code SampleProfileReaderCompactBinary::readNameTable() { auto Size = readNumber(); if (std::error_code EC = Size.getError()) @@ -1210,9 +1246,9 @@ } void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { - // If the reader is in compact format, we can't remap it because + // If the reader uses MD5 to represent string, we can't remap it because // we don't know what the original function names were. - if (Reader.getFormat() == SPF_Compact_Binary) { + if (Reader.useMD5()) { Ctx.diagnose(DiagnosticInfoSampleProfile( Reader.getBuffer()->getBufferIdentifier(), "Profile data remapping cannot be applied to profile data " Index: llvm/lib/ProfileData/SampleProfWriter.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfWriter.cpp +++ llvm/lib/ProfileData/SampleProfWriter.cpp @@ -166,6 +166,22 @@ return sampleprof_error::success; } +std::error_code SampleProfileWriterExtBinary::writeNameTable() { + if (!UseMD5) + return SampleProfileWriterBinary::writeNameTable(); + + auto &OS = *OutputStream; + std::set V; + stablizeNameTable(V); + + // Write out the name table. + encodeULEB128(NameTable.size(), OS); + for (auto N : V) { + encodeULEB128(MD5Hash(N), OS); + } + return sampleprof_error::success; +} + std::error_code SampleProfileWriterExtBinary::writeSections( const StringMap &ProfileMap) { uint64_t SectionStart = markSectionStart(SecProfSummary); Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -236,7 +236,7 @@ DenseMap &GUIDToFuncNameMap) : CurrentReader(Reader), CurrentModule(M), CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { - if (CurrentReader.getFormat() != SPF_Compact_Binary) + if (!CurrentReader.useMD5()) return; for (const auto &F : CurrentModule) { @@ -262,7 +262,7 @@ } ~GUIDToFuncNameMapper() { - if (CurrentReader.getFormat() != SPF_Compact_Binary) + if (!CurrentReader.useMD5()) return; CurrentGUIDToFuncNameMap.clear(); Index: llvm/test/Transforms/SampleProfile/profile-format.ll =================================================================== --- llvm/test/Transforms/SampleProfile/profile-format.ll +++ llvm/test/Transforms/SampleProfile/profile-format.ll @@ -4,6 +4,8 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.compactbinary.afdo -S | FileCheck %s ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.extbinary.afdo -S | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.extbinary.afdo -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.md5extbinary.afdo -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.md5extbinary.afdo -S | FileCheck %s ; Original C++ test case ; Index: llvm/test/tools/llvm-profdata/roundtrip.test =================================================================== --- llvm/test/tools/llvm-profdata/roundtrip.test +++ llvm/test/tools/llvm-profdata/roundtrip.test @@ -16,3 +16,11 @@ RUN: llvm-profdata merge --sample --extbinary -output=%t.5.profdata %t.4.profdata RUN: llvm-profdata merge --sample --text -output=%t.4.proftext %t.5.profdata RUN: diff -b %t.4.proftext %S/Inputs/sample-profile.proftext +# Trip from text --> extbinary --> md5text +# Trip from text --> compbinary --> md5text +# Compare the two md5 texts +RUN: llvm-profdata merge --sample --compbinary -output=%t.6.profdata %S/Inputs/sample-profile.proftext +RUN: llvm-profdata merge --sample --text -output=%t.6.proftext %S/Inputs/sample-profile.proftext +RUN: llvm-profdata merge --sample --extbinary -use-md5 -output=%t.7.profdata %S/Inputs/sample-profile.proftext +RUN: llvm-profdata merge --sample --text -output=%t.7.proftext %S/Inputs/sample-profile.proftext +RUN: diff -b %t.6.proftext %t.7.proftext Index: llvm/tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- llvm/tools/llvm-profdata/llvm-profdata.cpp +++ llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -448,7 +448,7 @@ ProfileFormat OutputFormat, MemoryBuffer *Buffer, sampleprof::ProfileSymbolList &WriterList, - bool CompressAllSections) { + bool CompressAllSections, bool UseMD5) { populateProfileSymbolList(Buffer, WriterList); if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) warn("Profile Symbol list is not empty but the output format is not " @@ -465,14 +465,22 @@ ExtBinaryWriter->setToCompressAllSections(); } } + if (UseMD5) { + if (OutputFormat != PF_Ext_Binary) { + warn("-use-md5 is ignored. Specify -extbinary to enable it"); + } else { + auto ExtBinaryWriter = + static_cast(&Writer); + ExtBinaryWriter->setUseMD5(); + } + } } -static void mergeSampleProfile(const WeightedFileVector &Inputs, - SymbolRemapper *Remapper, - StringRef OutputFilename, - ProfileFormat OutputFormat, - StringRef ProfileSymbolListFile, - bool CompressAllSections, FailureMode FailMode) { +static void +mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, + StringRef OutputFilename, ProfileFormat OutputFormat, + StringRef ProfileSymbolListFile, bool CompressAllSections, + bool UseMD5, FailureMode FailMode) { using namespace sampleprof; StringMap ProfileMap; SmallVector, 5> Readers; @@ -529,7 +537,7 @@ // Make sure Buffer lives as long as WriterList. auto Buffer = getInputFileBuf(ProfileSymbolListFile); handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, - CompressAllSections); + CompressAllSections, UseMD5); Writer->write(ProfileMap); } @@ -657,6 +665,10 @@ "compress-all-sections", cl::init(false), cl::Hidden, cl::desc("Compress all sections when writing the profile (only " "meaningful for -extbinary)")); + cl::opt UseMD5( + "use-md5", cl::init(false), cl::Hidden, + cl::desc("Choose to use MD5 to represent string in name table (only " + "meaningful for -extbinary)")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -691,7 +703,7 @@ else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, - FailureMode); + UseMD5, FailureMode); return 0; } Index: llvm/unittests/ProfileData/SampleProfTest.cpp =================================================================== --- llvm/unittests/ProfileData/SampleProfTest.cpp +++ llvm/unittests/ProfileData/SampleProfTest.cpp @@ -76,11 +76,13 @@ OS->close(); } - void testRoundTrip(SampleProfileFormat Format, bool Remap) { + void testRoundTrip(SampleProfileFormat Format, bool Remap, bool UseMD5) { SmallVector ProfilePath; ASSERT_TRUE(NoError(llvm::sys::fs::createTemporaryFile("profile", "", ProfilePath))); StringRef Profile(ProfilePath.data(), ProfilePath.size()); createWriter(Format, Profile); + if (Format == SampleProfileFormat::SPF_Ext_Binary && UseMD5) + static_cast(Writer.get())->setUseMD5(); StringRef FooName("_Z3fooi"); FunctionSamples FooSamples; @@ -167,7 +169,7 @@ FunctionSamples *ReadFooSamples = Reader->getSamplesFor(FooName); ASSERT_TRUE(ReadFooSamples != nullptr); - if (Format != SampleProfileFormat::SPF_Compact_Binary) { + if (!UseMD5) { ASSERT_EQ("_Z3fooi", ReadFooSamples->getName()); } ASSERT_EQ(7711u, ReadFooSamples->getTotalSamples()); @@ -175,7 +177,7 @@ FunctionSamples *ReadBarSamples = Reader->getSamplesFor(BarName); ASSERT_TRUE(ReadBarSamples != nullptr); - if (Format != SampleProfileFormat::SPF_Compact_Binary) { + if (!UseMD5) { ASSERT_EQ("_Z3bari", ReadBarSamples->getName()); } ASSERT_EQ(20301u, ReadBarSamples->getTotalSamples()); @@ -204,10 +206,10 @@ std::string MconstructGUID; StringRef MconstructRep = - getRepInFormat(MconstructName, Format, MconstructGUID); + getRepInFormat(MconstructName, UseMD5, MconstructGUID); std::string StringviewGUID; StringRef StringviewRep = - getRepInFormat(StringviewName, Format, StringviewGUID); + getRepInFormat(StringviewName, UseMD5, StringviewGUID); ASSERT_EQ(1000u, CTMap.get()[MconstructRep]); ASSERT_EQ(437u, CTMap.get()[StringviewRep]); @@ -332,31 +334,35 @@ }; TEST_F(SampleProfTest, roundtrip_text_profile) { - testRoundTrip(SampleProfileFormat::SPF_Text, false); + testRoundTrip(SampleProfileFormat::SPF_Text, false, false); } TEST_F(SampleProfTest, roundtrip_raw_binary_profile) { - testRoundTrip(SampleProfileFormat::SPF_Binary, false); + testRoundTrip(SampleProfileFormat::SPF_Binary, false, false); } TEST_F(SampleProfTest, roundtrip_compact_binary_profile) { - testRoundTrip(SampleProfileFormat::SPF_Compact_Binary, false); + testRoundTrip(SampleProfileFormat::SPF_Compact_Binary, false, true); } TEST_F(SampleProfTest, roundtrip_ext_binary_profile) { - testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, false); + testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, false, false); +} + +TEST_F(SampleProfTest, roundtrip_md5_ext_binary_profile) { + testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, false, true); } TEST_F(SampleProfTest, remap_text_profile) { - testRoundTrip(SampleProfileFormat::SPF_Text, true); + testRoundTrip(SampleProfileFormat::SPF_Text, true, false); } TEST_F(SampleProfTest, remap_raw_binary_profile) { - testRoundTrip(SampleProfileFormat::SPF_Binary, true); + testRoundTrip(SampleProfileFormat::SPF_Binary, true, false); } TEST_F(SampleProfTest, remap_ext_binary_profile) { - testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, true); + testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, true, false); } TEST_F(SampleProfTest, sample_overflow_saturation) {