Index: llvm/include/llvm/ProfileData/SampleProf.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProf.h +++ llvm/include/llvm/ProfileData/SampleProf.h @@ -164,7 +164,9 @@ // will be saved in the higher 32 bits. enum class SecCommonFlags : uint32_t { SecFlagInValid = 0, - SecFlagCompress = (1 << 0) + SecFlagCompress = (1 << 0), + // Indicate the section contains only profile without context. + SecFlagFlat = (1 << 1) }; // Section specific flags are defined here. Index: llvm/include/llvm/ProfileData/SampleProfReader.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfReader.h +++ llvm/include/llvm/ProfileData/SampleProfReader.h @@ -451,6 +451,10 @@ /// Return whether names in the profile are all MD5 numbers. virtual bool useMD5() { return false; } + /// Don't read profile without context if the flag is set. This is only meaningful + /// for ExtBinary format. + virtual void setSkipFlatProf(bool Skip) {} + SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); } protected: @@ -666,6 +670,10 @@ /// the lifetime of MD5StringBuf is not shorter than that of NameTable. std::unique_ptr> MD5StringBuf; + /// If SkipFlatProf is true, skip the sections with + /// SecFlagFlat flag. + bool SkipFlatProf = false; + public: SampleProfileReaderExtBinaryBase(std::unique_ptr B, LLVMContext &C, SampleProfileFormat Format) @@ -689,6 +697,8 @@ virtual std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); }; + + virtual void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; } }; class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { Index: llvm/include/llvm/ProfileData/SampleProfWriter.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfWriter.h +++ llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -15,6 +15,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/ErrorOr.h" @@ -28,6 +29,15 @@ namespace llvm { namespace sampleprof { +enum SectionLayout { + DefaultLayout, + // The layout splits profile with context information from profile without + // context information. When Thinlto is enabled, ThinLTO postlink phase only + // has to load profile with context information and can skip the other part. + CtxSplitLayout, + NumOfLayout, +}; + /// Sample-based profile writer. Base class. class SampleProfileWriter { public: @@ -60,6 +70,7 @@ virtual void setToCompressAllSections() {} virtual void setUseMD5() {} virtual void setPartialProfile() {} + virtual void resetSecLayout(SectionLayout SL) {} protected: SampleProfileWriter(std::unique_ptr &OS) @@ -144,6 +155,36 @@ using SampleProfileWriterBinary::SampleProfileWriterBinary; }; +const std::array, NumOfLayout> + ExtBinaryHdrLayoutTable = { + // Note that SecFuncOffsetTable section is written after SecLBRProfile + // in the profile, but is put before SecLBRProfile in SectionHdrLayout. + // This is because sample reader follows the order in SectionHdrLayout + // to read each section. To read function profiles on demand, sample + // reader need to get the offset of each function profile first. + // + // DefaultLayout + SmallVector({{SecProfSummary}, + {SecNameTable}, + {SecFuncOffsetTable}, + {SecLBRProfile}, + {SecProfileSymbolList}, + {SecFuncMetadata}}), + // CtxSplitLayout + SmallVector({{SecProfSummary}, + {SecNameTable}, + // profile with context + // for next two sections + {SecFuncOffsetTable}, + {SecLBRProfile}, + // profile without context + // for next two sections + {SecFuncOffsetTable}, + {SecLBRProfile}, + {SecProfileSymbolList}, + {SecFuncMetadata}}), +}; + class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; public: @@ -174,6 +215,17 @@ ProfSymList = PSL; }; + virtual void resetSecLayout(SectionLayout SL) override { + verifySecLayout(SL); + // Make sure resetSecLayout is called before any flag setting. + for (auto &Entry : SectionHdrLayout) { + assert(Entry.Flags == 0 && + "resetSecLayout has to be called before any flag setting"); + } + SecLayout = SL; + SectionHdrLayout = ExtBinaryHdrLayoutTable[SL]; + } + protected: uint64_t markSectionStart(SecType Type, uint32_t LayoutIdx); std::error_code addNewSection(SecType Sec, uint32_t LayoutIdx, @@ -185,11 +237,16 @@ addSecFlag(Entry, Flag); } } + template + void addSectionFlag(uint32_t SectionIdx, SecFlagType Flag) { + addSecFlag(SectionHdrLayout[SectionIdx], Flag); + } // placeholder for subclasses to dispatch their own section writers. virtual std::error_code writeCustomSection(SecType Type) = 0; + // Verify the SecLayout is supported by the format. + virtual void verifySecLayout(SectionLayout SL) = 0; - virtual void initSectionHdrLayout() = 0; // specify the order to write sections. virtual std::error_code writeSections(const StringMap &ProfileMap) = 0; @@ -211,11 +268,13 @@ std::error_code writeFuncOffsetTable(); std::error_code writeProfileSymbolListSection(); + SectionLayout SecLayout = DefaultLayout; // Specifiy the order of sections in section header table. Note // the order of sections in SecHdrTable may be different that the // order in SectionHdrLayout. sample Reader will follow the order // in SectionHdrLayout to read each section. - SmallVector SectionHdrLayout; + SmallVector SectionHdrLayout = + ExtBinaryHdrLayoutTable[DefaultLayout]; // Save the start of SecLBRProfile so we can compute the offset to the // start of SecLBRProfile for each Function's Profile and will keep it @@ -261,33 +320,25 @@ class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { public: SampleProfileWriterExtBinary(std::unique_ptr &OS) - : SampleProfileWriterExtBinaryBase(OS) { - initSectionHdrLayout(); - } + : SampleProfileWriterExtBinaryBase(OS) {} private: - virtual void initSectionHdrLayout() override { - // Note that SecFuncOffsetTable section is written after SecLBRProfile - // in the profile, but is put before SecLBRProfile in SectionHdrLayout. - // - // This is because sample reader follows the order of SectionHdrLayout to - // read each section, to read function profiles on demand sample reader - // need to get the offset of each function profile first. - // - // SecFuncOffsetTable section is written after SecLBRProfile in the - // profile because FuncOffsetTable needs to be populated while section - // SecLBRProfile is written. - SectionHdrLayout = { - {SecProfSummary, 0, 0, 0, 0}, {SecNameTable, 0, 0, 0, 0}, - {SecFuncOffsetTable, 0, 0, 0, 0}, {SecLBRProfile, 0, 0, 0, 0}, - {SecProfileSymbolList, 0, 0, 0, 0}, {SecFuncMetadata, 0, 0, 0, 0}}; - }; + std::error_code + writeDefaultLayout(const StringMap &ProfileMap); + std::error_code + writeCtxSplitLayout(const StringMap &ProfileMap); + virtual std::error_code writeSections(const StringMap &ProfileMap) override; virtual std::error_code writeCustomSection(SecType Type) override { return sampleprof_error::success; }; + + virtual void verifySecLayout(SectionLayout SL) override { + assert((SL == DefaultLayout || SL == CtxSplitLayout) && + "Unsupported layout"); + } }; // CompactBinary is a compact format of binary profile which both reduces Index: llvm/lib/ProfileData/SampleProfReader.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfReader.cpp +++ llvm/lib/ProfileData/SampleProfReader.cpp @@ -740,6 +740,10 @@ if (!Entry.Size) continue; + // Skip sections without context when SkipFlatProf is true. + if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) + continue; + const uint8_t *SecStart = BufStart + Entry.Offset; uint64_t SecSize = Entry.Size; @@ -986,6 +990,9 @@ else Flags.append("{"); + if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) + Flags.append("flat,"); + switch (Entry.Type) { case SecNameTable: if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) Index: llvm/lib/ProfileData/SampleProfWriter.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfWriter.cpp +++ llvm/lib/ProfileData/SampleProfWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Compression.h" @@ -264,7 +265,7 @@ return sampleprof_error::success; } -std::error_code SampleProfileWriterExtBinary::writeSections( +std::error_code SampleProfileWriterExtBinary::writeDefaultLayout( const StringMap &ProfileMap) { // The const indices passed to writeOneSection below are specifying the // positions of the sections in SectionHdrLayout. Look at @@ -285,6 +286,61 @@ return sampleprof_error::success; } +static void +splitProfileMapToTwo(const StringMap &ProfileMap, + StringMap &ContextProfileMap, + StringMap &NoContextProfileMap) { + for (const auto &I : ProfileMap) { + if (I.second.getCallsiteSamples().size()) + ContextProfileMap.insert({I.first(), I.second}); + else + NoContextProfileMap.insert({I.first(), I.second}); + } +} + +std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout( + const StringMap &ProfileMap) { + StringMap ContextProfileMap, NoContextProfileMap; + splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap); + + if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecLBRProfile, 3, ContextProfileMap)) + return EC; + if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ContextProfileMap)) + return EC; + // Mark the section to have no context. Note section flag needs to be set + // before writing the section. + addSectionFlag(5, SecCommonFlags::SecFlagFlat); + if (auto EC = writeOneSection(SecLBRProfile, 5, NoContextProfileMap)) + return EC; + // Mark the section to have no context. Note section flag needs to be set + // before writing the section. + addSectionFlag(4, SecCommonFlags::SecFlagFlat); + if (auto EC = writeOneSection(SecFuncOffsetTable, 4, NoContextProfileMap)) + return EC; + if (auto EC = writeOneSection(SecProfileSymbolList, 6, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecFuncMetadata, 7, ProfileMap)) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinary::writeSections( + const StringMap &ProfileMap) { + std::error_code EC; + if (SecLayout == DefaultLayout) + EC = writeDefaultLayout(ProfileMap); + else if (SecLayout == CtxSplitLayout) + EC = writeCtxSplitLayout(ProfileMap); + else + llvm_unreachable("Unsupported layout"); + return EC; +} + std::error_code SampleProfileWriterCompactBinary::write( const StringMap &ProfileMap) { if (std::error_code EC = SampleProfileWriter::write(ProfileMap)) Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1946,6 +1946,7 @@ return false; } Reader = std::move(ReaderOrErr.get()); + Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink); Reader->collectFuncsFrom(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); PSL = Reader->getProfileSymbolList(); @@ -2111,7 +2112,10 @@ initialEntryCount = -1; } - F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); + // Initialize entry count when the function has no existing entry + // count value. + if (!F.getEntryCount().hasValue()) + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { auto &FAM = Index: llvm/test/Transforms/SampleProfile/ctxsplit.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/ctxsplit.ll @@ -0,0 +1,59 @@ +; Check the nonflattened part of the ctxsplit profile will be read in thinlto +; postlink phase while flattened part of the ctxsplit profile will not be read. +; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=POSTLINK +; +; Check both the flattened and nonflattened parts of the ctxsplit profile will +; be read in thinlto prelink phase. +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=PRELINK +; +; Check both the flattened and nonflattened parts of the ctxsplit profile will +; be read in non-thinlto mode. +; RUN: opt < %s -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=NOTHINLTO + +; POSTLINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; POSTLINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; POSTLINK: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; POSTLINK: ![[ENTRY2]] = !{!"function_entry_count", i64 -1} +; PRELINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; PRELINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; PRELINK: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; PRELINK: ![[ENTRY2]] = !{!"function_entry_count", i64 3001} +; NOTHINLTO: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; NOTHINLTO: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; NOTHINLTO: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; NOTHINLTO: ![[ENTRY2]] = !{!"function_entry_count", i64 3001} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @goo() #0 !dbg !10 { +entry: + ret i32 -1, !dbg !11 +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @foo() #0 !dbg !7 { +entry: + ret i32 -1, !dbg !9 +} + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 8.0.0 (trunk 345241)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 3, scope: !7) +!10 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !0, retainedNodes: !2) +!11 = !DILocation(line: 10, column: 3, scope: !10) +