Index: llvm/include/llvm/ProfileData/SampleProf.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProf.h +++ llvm/include/llvm/ProfileData/SampleProf.h @@ -164,7 +164,9 @@ // will be saved in the higher 32 bits. enum class SecCommonFlags : uint32_t { SecFlagInValid = 0, - SecFlagCompress = (1 << 0) + SecFlagCompress = (1 << 0), + // Indicate the section contains only profile without context. + SecFlagNoContext = (1 << 1) }; // Section specific flags are defined here. Index: llvm/include/llvm/ProfileData/SampleProfReader.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfReader.h +++ llvm/include/llvm/ProfileData/SampleProfReader.h @@ -451,6 +451,10 @@ /// Return whether names in the profile are all MD5 numbers. virtual bool useMD5() { return false; } + /// Don't read profile without context if the flag is set. This is only meaningful + /// for ExtBinary format. + virtual void setSkipNoContextProf(bool Skip) {} + SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); } protected: @@ -666,6 +670,10 @@ /// the lifetime of MD5StringBuf is not shorter than that of NameTable. std::unique_ptr> MD5StringBuf; + /// If SkipNoContextProf is true, skip the sections with + /// SecFlagNoContext flag. + bool SkipNoContextProf = false; + public: SampleProfileReaderExtBinaryBase(std::unique_ptr B, LLVMContext &C, SampleProfileFormat Format) @@ -689,6 +697,10 @@ virtual std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); }; + + virtual void setSkipNoContextProf(bool Skip) override { + SkipNoContextProf = Skip; + } }; class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { Index: llvm/include/llvm/ProfileData/SampleProfWriter.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfWriter.h +++ llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -15,6 +15,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/ErrorOr.h" @@ -28,6 +29,15 @@ namespace llvm { namespace sampleprof { +enum SectionLayout { + DefaultLayout, + // The layout splits profile with context information from profile without + // context information. When Thinlto is enabled, ThinLTO postlink phase only + // has to load profile with context information and can skip the other part. + CtxSplitLayout, + NumOfLayout, +}; + /// Sample-based profile writer. Base class. class SampleProfileWriter { public: @@ -60,6 +70,7 @@ virtual void setToCompressAllSections() {} virtual void setUseMD5() {} virtual void setPartialProfile() {} + virtual void resetSecLayout(SectionLayout SL) {} protected: SampleProfileWriter(std::unique_ptr &OS) @@ -144,6 +155,36 @@ using SampleProfileWriterBinary::SampleProfileWriterBinary; }; +const std::array, NumOfLayout> + ExtBinaryLayoutTable = { + // Note that SecFuncOffsetTable section is written after SecLBRProfile + // in the profile, but is put before SecLBRProfile in SectionHdrLayout. + // This is because sample reader follows the order in SectionHdrLayout + // to read each section. To read function profiles on demand, sample + // reader need to get the offset of each function profile first. + // + // DefaultLayout + SmallVector({{SecProfSummary}, + {SecNameTable}, + {SecFuncOffsetTable}, + {SecLBRProfile}, + {SecProfileSymbolList}, + {SecFuncMetadata}}), + // CtxSplitLayout + SmallVector({{SecProfSummary}, + {SecNameTable}, + // profile with context + // for next two sections + {SecFuncOffsetTable}, + {SecLBRProfile}, + // profile without context + // for next two sections + {SecFuncOffsetTable}, + {SecLBRProfile}, + {SecProfileSymbolList}, + {SecFuncMetadata}}), +}; + class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; public: @@ -174,6 +215,17 @@ ProfSymList = PSL; }; + virtual void resetSecLayout(SectionLayout SL) override { + verifySecLayout(SL); + // Make sure resetSecLayout is called before any flag setting. + for (auto &Entry : SectionHdrLayout) { + assert(Entry.Flags == 0 && + "resetSecLayout has to be called before any flag setting"); + } + SecLayout = SL; + SectionHdrLayout = ExtBinaryLayoutTable[SL]; + } + protected: uint64_t markSectionStart(SecType Type, uint32_t LayoutIdx); std::error_code addNewSection(SecType Sec, uint32_t LayoutIdx, @@ -185,11 +237,16 @@ addSecFlag(Entry, Flag); } } + template + void addSectionFlag(uint32_t SectionIdx, SecFlagType Flag) { + addSecFlag(SectionHdrLayout[SectionIdx], Flag); + } // placeholder for subclasses to dispatch their own section writers. virtual std::error_code writeCustomSection(SecType Type) = 0; + // Verify the SecLayout is supported by the format. + virtual void verifySecLayout(SectionLayout SL) = 0; - virtual void initSectionHdrLayout() = 0; // specify the order to write sections. virtual std::error_code writeSections(const StringMap &ProfileMap) = 0; @@ -211,11 +268,13 @@ std::error_code writeFuncOffsetTable(); std::error_code writeProfileSymbolListSection(); + SectionLayout SecLayout = DefaultLayout; // Specifiy the order of sections in section header table. Note // the order of sections in SecHdrTable may be different that the // order in SectionHdrLayout. sample Reader will follow the order // in SectionHdrLayout to read each section. - SmallVector SectionHdrLayout; + SmallVector SectionHdrLayout = + ExtBinaryLayoutTable[DefaultLayout]; // Save the start of SecLBRProfile so we can compute the offset to the // start of SecLBRProfile for each Function's Profile and will keep it @@ -261,33 +320,25 @@ class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { public: SampleProfileWriterExtBinary(std::unique_ptr &OS) - : SampleProfileWriterExtBinaryBase(OS) { - initSectionHdrLayout(); - } + : SampleProfileWriterExtBinaryBase(OS) {} private: - virtual void initSectionHdrLayout() override { - // Note that SecFuncOffsetTable section is written after SecLBRProfile - // in the profile, but is put before SecLBRProfile in SectionHdrLayout. - // - // This is because sample reader follows the order of SectionHdrLayout to - // read each section, to read function profiles on demand sample reader - // need to get the offset of each function profile first. - // - // SecFuncOffsetTable section is written after SecLBRProfile in the - // profile because FuncOffsetTable needs to be populated while section - // SecLBRProfile is written. - SectionHdrLayout = { - {SecProfSummary, 0, 0, 0, 0}, {SecNameTable, 0, 0, 0, 0}, - {SecFuncOffsetTable, 0, 0, 0, 0}, {SecLBRProfile, 0, 0, 0, 0}, - {SecProfileSymbolList, 0, 0, 0, 0}, {SecFuncMetadata, 0, 0, 0, 0}}; - }; + std::error_code + writeDefaultLayout(const StringMap &ProfileMap); + std::error_code + writeCtxSplitLayout(const StringMap &ProfileMap); + virtual std::error_code writeSections(const StringMap &ProfileMap) override; virtual std::error_code writeCustomSection(SecType Type) override { return sampleprof_error::success; }; + + virtual void verifySecLayout(SectionLayout SL) override { + assert((SL == DefaultLayout || SL == CtxSplitLayout) && + "Unsupported layout"); + } }; // CompactBinary is a compact format of binary profile which both reduces Index: llvm/include/llvm/Transforms/IPO/SampleProfile.h =================================================================== --- llvm/include/llvm/Transforms/IPO/SampleProfile.h +++ llvm/include/llvm/Transforms/IPO/SampleProfile.h @@ -25,9 +25,11 @@ class SampleProfileLoaderPass : public PassInfoMixin { public: SampleProfileLoaderPass(std::string File = "", std::string RemappingFile = "", - bool IsThinLTOPreLink = false) + bool IsThinLTOPreLink = false, + bool IsThinLTOPostLink = false) : ProfileFileName(File), ProfileRemappingFileName(RemappingFile), - IsThinLTOPreLink(IsThinLTOPreLink) {} + IsThinLTOPreLink(IsThinLTOPreLink), + IsThinLTOPostLink(IsThinLTOPostLink) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); @@ -35,6 +37,7 @@ std::string ProfileFileName; std::string ProfileRemappingFileName; bool IsThinLTOPreLink; + bool IsThinLTOPostLink; }; } // end namespace llvm Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1070,9 +1070,9 @@ if (LoadSampleProfile) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile, - Phase == ThinLTOPhase::PreLink)); + MPM.addPass(SampleProfileLoaderPass( + PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, + Phase == ThinLTOPhase::PreLink, Phase == ThinLTOPhase::PostLink)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); @@ -1545,9 +1545,9 @@ if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { // Load sample profile before running the LTO optimization pipeline. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile, - false /* ThinLTOPhase::PreLink */)); + MPM.addPass(SampleProfileLoaderPass( + PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, + false /* ThinLTOPhase::PreLink */, false /* ThinLTOPhase::PostLink */)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); Index: llvm/lib/ProfileData/SampleProfReader.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfReader.cpp +++ llvm/lib/ProfileData/SampleProfReader.cpp @@ -740,6 +740,11 @@ if (!Entry.Size) continue; + // Skip sections without context when SkipNoContextProf is true. + if (SkipNoContextProf && + hasSecFlag(Entry, SecCommonFlags::SecFlagNoContext)) + continue; + const uint8_t *SecStart = BufStart + Entry.Offset; uint64_t SecSize = Entry.Size; @@ -986,6 +991,9 @@ else Flags.append("{"); + if (hasSecFlag(Entry, SecCommonFlags::SecFlagNoContext)) + Flags.append("nocontext,"); + switch (Entry.Type) { case SecNameTable: if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) Index: llvm/lib/ProfileData/SampleProfWriter.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfWriter.cpp +++ llvm/lib/ProfileData/SampleProfWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Compression.h" @@ -264,7 +265,7 @@ return sampleprof_error::success; } -std::error_code SampleProfileWriterExtBinary::writeSections( +std::error_code SampleProfileWriterExtBinary::writeDefaultLayout( const StringMap &ProfileMap) { // The const indices passed to writeOneSection below are specifying the // positions of the sections in SectionHdrLayout. Look at @@ -285,6 +286,61 @@ return sampleprof_error::success; } +static void +splitProfileMapToTwo(const StringMap &ProfileMap, + StringMap &ContextProfileMap, + StringMap &NoContextProfileMap) { + for (const auto &I : ProfileMap) { + if (I.second.getCallsiteSamples().size()) + ContextProfileMap.insert({I.first(), I.second}); + else + NoContextProfileMap.insert({I.first(), I.second}); + } +} + +std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout( + const StringMap &ProfileMap) { + StringMap ContextProfileMap, NoContextProfileMap; + splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap); + + if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecLBRProfile, 3, ContextProfileMap)) + return EC; + if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ContextProfileMap)) + return EC; + // Mark the section to have no context. Note section flag needs to be set + // before writing the section. + addSectionFlag(5, SecCommonFlags::SecFlagNoContext); + if (auto EC = writeOneSection(SecLBRProfile, 5, NoContextProfileMap)) + return EC; + // Mark the section to have no context. Note section flag needs to be set + // before writing the section. + addSectionFlag(4, SecCommonFlags::SecFlagNoContext); + if (auto EC = writeOneSection(SecFuncOffsetTable, 4, NoContextProfileMap)) + return EC; + if (auto EC = writeOneSection(SecProfileSymbolList, 6, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecFuncMetadata, 7, ProfileMap)) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinary::writeSections( + const StringMap &ProfileMap) { + std::error_code EC; + if (SecLayout == DefaultLayout) + EC = writeDefaultLayout(ProfileMap); + else if (SecLayout == CtxSplitLayout) + EC = writeCtxSplitLayout(ProfileMap); + else + llvm_unreachable("Unsupported layout"); + return EC; +} + std::error_code SampleProfileWriterCompactBinary::write( const StringMap &ProfileMap) { if (std::error_code EC = SampleProfileWriter::write(ProfileMap)) Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -322,6 +322,7 @@ public: SampleProfileLoader( StringRef Name, StringRef RemapName, bool IsThinLTOPreLink, + bool IsThinLTOPostLink, std::function GetAssumptionCache, std::function GetTargetTransformInfo, std::function GetTLI) @@ -329,7 +330,8 @@ GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), CoverageTracker(*this), Filename(std::string(Name)), RemappingFilename(std::string(RemapName)), - IsThinLTOPreLink(IsThinLTOPreLink) {} + IsThinLTOPreLink(IsThinLTOPreLink), + IsThinLTOPostLink(IsThinLTOPostLink) {} bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -453,6 +455,11 @@ /// In this phase, in annotation, we should not promote indirect calls. /// Instead, we will mark GUIDs that needs to be annotated to the function. bool IsThinLTOPreLink; + /// Flag indicating if the pass is invoked in ThinLTO compile phase. + /// + /// If the function profiles with and without context are split, in thinlto + /// postlink phase, only profiles with context will be read. + bool IsThinLTOPostLink; /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; @@ -506,9 +513,11 @@ static char ID; SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, - bool IsThinLTOPreLink = false) + bool IsThinLTOPreLink = false, + bool IsThinLTOPostLink = false) : ModulePass(ID), SampleLoader( Name, SampleProfileRemappingFile, IsThinLTOPreLink, + IsThinLTOPostLink, [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }, @@ -1945,6 +1954,7 @@ return false; } Reader = std::move(ReaderOrErr.get()); + Reader->setSkipNoContextProf(IsThinLTOPostLink); Reader->collectFuncsFrom(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); PSL = Reader->getProfileSymbolList(); @@ -2110,7 +2120,10 @@ initialEntryCount = -1; } - F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); + // Initialize entry count when the function has no existing entry + // count value. + if (!F.getEntryCount().hasValue()) + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { auto &FAM = @@ -2151,7 +2164,7 @@ ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, ProfileRemappingFileName.empty() ? SampleProfileRemappingFile : ProfileRemappingFileName, - IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI); + IsThinLTOPreLink, IsThinLTOPostLink, GetAssumptionCache, GetTTI, GetTLI); if (!SampleLoader.doInitialization(M, &FAM)) return PreservedAnalyses::all(); Index: llvm/test/Transforms/SampleProfile/ctxsplit.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/ctxsplit.ll @@ -0,0 +1,59 @@ +; Check the nonflattened part of the ctxsplit profile will be read in thinlto +; postlink phase while flattened part of the ctxsplit profile will not be read. +; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=POSTLINK +; +; Check both the flattened and nonflattened parts of the ctxsplit profile will +; be read in thinlto prelink phase. +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=PRELINK +; +; Check both the flattened and nonflattened parts of the ctxsplit profile will +; be read in non-thinlto mode. +; RUN: opt < %s -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=NOTHINLTO + +; POSTLINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; POSTLINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; POSTLINK: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; POSTLINK: ![[ENTRY2]] = !{!"function_entry_count", i64 -1} +; PRELINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; PRELINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; PRELINK: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; PRELINK: ![[ENTRY2]] = !{!"function_entry_count", i64 3001} +; NOTHINLTO: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; NOTHINLTO: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; NOTHINLTO: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; NOTHINLTO: ![[ENTRY2]] = !{!"function_entry_count", i64 3001} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @goo() #0 !dbg !10 { +entry: + ret i32 -1, !dbg !11 +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @foo() #0 !dbg !7 { +entry: + ret i32 -1, !dbg !9 +} + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 8.0.0 (trunk 345241)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 3, scope: !7) +!10 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !0, retainedNodes: !2) +!11 = !DILocation(line: 10, column: 3, scope: !10) +