Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -211,6 +211,8 @@ extern cl::opt EnableHotColdSplit; +extern cl::opt FlattenedProfileUsed; + static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { switch (Level) { case PassBuilder::O0: @@ -615,9 +617,13 @@ if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, - PGOOpt->ProfileRemappingFile, - Phase == ThinLTOPhase::PreLink)); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink)) + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + PGOOpt->ProfileRemappingFile, + Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -104,6 +104,10 @@ EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable preparation for ThinLTO.")); +static cl::opt + EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, + cl::desc("Enable performing ThinLTO.")); + cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden, cl::desc("Enable hot-cold splitting pass")); @@ -146,6 +150,11 @@ EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); +cl::opt FlattenedProfileUsed( + "flattened-profile-used", cl::init(false), cl::Hidden, + cl::desc("Indicate the sample profile being used is flattened, i.e., " + "no inline hierachy exists in the profile. ")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -166,7 +175,7 @@ PGOInstrUse = ""; PGOSampleUse = ""; PrepareForThinLTO = EnablePrepareForThinLTO; - PerformThinLTO = false; + PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; } @@ -414,7 +423,11 @@ legacy::PassManagerBase &MPM) { if (!PGOSampleUse.empty()) { MPM.add(createPruneEHPass()); - MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!(FlattenedProfileUsed && PerformThinLTO)) + MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); } // Allow forcing function attributes as a debugging and tuning aid. Index: test/Transforms/SampleProfile/Inputs/flattened.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/flattened.prof +++ test/Transforms/SampleProfile/Inputs/flattened.prof @@ -0,0 +1,2 @@ +foo:100:100 + 1: 100 Index: test/Transforms/SampleProfile/flattened.ll =================================================================== --- test/Transforms/SampleProfile/flattened.ll +++ test/Transforms/SampleProfile/flattened.ll @@ -0,0 +1,39 @@ +; Check flattened profile will not be read in thinlto postlink. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -perform-thinlto=true -S | FileCheck %s +; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s +; +; Check flattened profile will be read in thinlto prelink. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -prepare-for-thinlto=true -S | FileCheck %s --check-prefix=PRELINK +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=PRELINK +; +; Check flattened profile will be read in non-thinlto mode. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -S | FileCheck %s --check-prefix=NOTHINLTO +; RUN: opt < %s -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=NOTHINLTO +; +; CHECK-NOT: !{!"ProfileFormat", !"SampleProfile"} +; PRELINK: !{!"ProfileFormat", !"SampleProfile"} +; NOTHINLTO: !{!"ProfileFormat", !"SampleProfile"} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @foo() local_unnamed_addr !dbg !7 { +entry: + ret i32 -1, !dbg !9 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 8.0.0 (trunk 345241)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 3, scope: !7)