Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -837,6 +837,11 @@ // some were not. Set when the combined index is created during the thin link. bool PartiallySplitLTOUnits = false; + // True if compiled with a flattened sample PGO profile. This is set in the + // per module index during module summary building. For the combined index + // it contains the OR of all input summary index flags. + bool FlattenedProfileUsed = false; + std::set CfiFunctionDefs; std::set CfiFunctionDecls; @@ -856,9 +861,10 @@ public: // See HaveGVs variable comment. - ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false) - : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), Saver(Alloc) { - } + ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false, + bool FlattenedProfileUsed = false) + : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), + FlattenedProfileUsed(FlattenedProfileUsed), Saver(Alloc) {} bool haveGVs() const { return HaveGVs; } @@ -954,6 +960,9 @@ bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; } void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; } + bool flattenedProfileUsed() const { return FlattenedProfileUsed; } + void setFlattenedProfileUsed() { FlattenedProfileUsed = true; } + bool isGlobalValueLive(const GlobalValueSummary *GVS) const { return !WithGlobalValueDeadStripping || GVS->isLive(); } Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -75,6 +75,8 @@ cl::value_desc("filename"), cl::desc("File to emit dot graph of new summary into.")); +extern cl::opt FlattenedProfileUsed; + // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). @@ -465,7 +467,8 @@ if (auto *MD = mdconst::extract_or_null( M.getModuleFlag("EnableSplitLTOUnit"))) EnableSplitLTOUnit = MD->getZExtValue(); - ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit); + ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, + FlattenedProfileUsed); // Identify the local values in the llvm.used and llvm.compiler.used sets, // which should not be exported as they would then require renaming and Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -5367,7 +5367,7 @@ case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0x1f && "Unexpected bits in flag"); + assert(Flags <= 0x2f && "Unexpected bits in flag"); // 1 bit: WithGlobalValueDeadStripping flag. // Set on combined index only. @@ -5390,6 +5390,11 @@ // Set on combined index only. if (Flags & 0x10) TheIndex.setPartiallySplitLTOUnits(); + // 1 bit: FlattenedProfileUsed flag. + // Set on per module indexes. This effectively ORs the flag across all + // modules being linked. + if (Flags & 0x20) + TheIndex.setFlattenedProfileUsed(); break; } case bitc::FS_VALUE_GUID: { // [valueid, refguid] @@ -6032,7 +6037,7 @@ case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0x1f && "Unexpected bits in flag"); + assert(Flags <= 0x2f && "Unexpected bits in flag"); return Flags & 0x8; } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3656,9 +3656,11 @@ // Write the index flags. uint64_t Flags = 0; - // Bits 1-3 are set only in the combined index, skip them. + // Bits 1-3 and 5 are set only in the combined index, skip them. if (Index->enableSplitLTOUnit()) Flags |= 0x8; + if (Index->flattenedProfileUsed()) + Flags |= 0x20; Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef{Flags}); if (Index->begin() == Index->end()) { @@ -3781,6 +3783,8 @@ Flags |= 0x8; if (Index.partiallySplitLTOUnits()) Flags |= 0x10; + if (Index.flattenedProfileUsed()) + Flags |= 0x20; Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef{Flags}); for (const auto &GVI : valueIds()) { Index: lib/ProfileData/SampleProf.cpp =================================================================== --- lib/ProfileData/SampleProf.cpp +++ lib/ProfileData/SampleProf.cpp @@ -25,6 +25,11 @@ using namespace llvm; using namespace sampleprof; +cl::opt FlattenedProfileUsed( + "flattened-profile-used", cl::init(false), cl::Hidden, + cl::desc("Indicate the sample profile being used is flattened, i.e., " + "no inline hierachy exists in the profile. ")); + namespace llvm { namespace sampleprof { SampleProfileFormat FunctionSamples::Format; Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -149,10 +149,7 @@ EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); -cl::opt FlattenedProfileUsed( - "flattened-profile-used", cl::init(false), cl::Hidden, - cl::desc("Indicate the sample profile being used is flattened, i.e., " - "no inline hierachy exists in the profile. ")); +extern cl::opt FlattenedProfileUsed; PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; Index: test/Bitcode/thinlto-flattened-flag.ll =================================================================== --- /dev/null +++ test/Bitcode/thinlto-flattened-flag.ll @@ -0,0 +1,35 @@ +; REQUIRES: x86-registered-target + +; First generate four ThinLTO versions of this file, two without flattened +; profiles and two with. Confirm that the flattened profile flag is set +; approprately. It doesn't matter for the purposes of the summary analysis +; whether there actually is a profile passed in. +; RUN: opt -module-summary %s -o %tNF1.o +; RUN: llvm-bcanalyzer -dump %tNF1.o | FileCheck %s --check-prefix=NOFLAT +; RUN: opt -module-summary %s -o %tNF2.o +; RUN: llvm-bcanalyzer -dump %tNF2.o | FileCheck %s --check-prefix=NOFLAT +; RUN: opt -module-summary -flattened-profile-used %s -o %tF1.o +; RUN: llvm-bcanalyzer -dump %tF1.o | FileCheck %s --check-prefix=FLAT +; RUN: opt -module-summary -flattened-profile-used %s -o %tF2.o +; RUN: llvm-bcanalyzer -dump %tF2.o | FileCheck %s --check-prefix=FLAT + +; NOFLAT: +; FLAT: + +; Next check that the flag is propagated onto the combined index as expected. +; The result should be an OR of the input flags. + +; RUN: llvm-lto2 run %tNF1.o %tNF2.o -o %t.out -save-temps +; RUN: llvm-bcanalyzer -dump %t.out.index.bc | FileCheck %s --check-prefix=NOFLAT + +; RUN: llvm-lto2 run %tF1.o %tF2.o -o %t.out -save-temps +; RUN: llvm-bcanalyzer -dump %t.out.index.bc | FileCheck %s --check-prefix=FLAT + +; RUN: llvm-lto2 run %tNF1.o %tF2.o -o %t.out -save-temps +; RUN: llvm-bcanalyzer -dump %t.out.index.bc | FileCheck %s --check-prefix=FLAT + +; RUN: llvm-lto2 run %tF1.o %tNF2.o -o %t.out -save-temps +; RUN: llvm-bcanalyzer -dump %t.out.index.bc | FileCheck %s --check-prefix=FLAT + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu"