Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -207,6 +207,8 @@ extern cl::opt EnableHotColdSplit; +extern cl::opt FlattenedProfileUsed; + static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { switch (Level) { case PassBuilder::O0: @@ -611,9 +613,13 @@ if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, - PGOOpt->ProfileRemappingFile, - Phase == ThinLTOPhase::PreLink)); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!FlattenedProfileUsed || Phase != ThinLTOPhase::PostLink) + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + PGOOpt->ProfileRemappingFile, + Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -160,6 +160,11 @@ EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); +cl::opt FlattenedProfileUsed( + "flattened-profile-used", cl::init(false), cl::Hidden, + cl::desc("Indicate the sample profile being used is flattened, i.e., " + "no inline hierachy exists in the profile. ")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -427,7 +432,11 @@ legacy::PassManagerBase &MPM) { if (!PGOSampleUse.empty()) { MPM.add(createPruneEHPass()); - MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!FlattenedProfileUsed || PrepareForThinLTO) + MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); } // Allow forcing function attributes as a debugging and tuning aid. Index: test/Transforms/SampleProfile/Inputs/flattened.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/flattened.prof +++ test/Transforms/SampleProfile/Inputs/flattened.prof @@ -0,0 +1,2 @@ +foo:100:100 + 1: 100 Index: test/Transforms/SampleProfile/flattened.ll =================================================================== --- test/Transforms/SampleProfile/flattened.ll +++ test/Transforms/SampleProfile/flattened.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -prepare-for-thinlto=false -S | FileCheck %s +; RUN: opt < %s -passes='thinlto' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s +; +; Check SampleProfileLoader doesn't read the profile in ThinLTO postlink stage +; when flattened sample profile is used. +; CHECK-NOT: !{!"ProfileFormat", !"SampleProfile"} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @foo() local_unnamed_addr !dbg !7 { +entry: + ret i32 -1, !dbg !9 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 8.0.0 (trunk 345241)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 3, scope: !7) Index: tools/opt/NewPMDriver.cpp =================================================================== --- tools/opt/NewPMDriver.cpp +++ tools/opt/NewPMDriver.cpp @@ -102,18 +102,8 @@ cl::Hidden); enum PGOKind { NoPGO, InstrGen, InstrUse, SampleUse }; -static cl::opt PGOKindFlag( - "pgo-kind", cl::init(NoPGO), cl::Hidden, - cl::desc("The kind of profile guided optimization"), - cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."), - clEnumValN(InstrGen, "new-pm-pgo-instr-gen-pipeline", - "Instrument the IR to generate profile."), - clEnumValN(InstrUse, "new-pm-pgo-instr-use-pipeline", - "Use instrumented profile to guide PGO."), - clEnumValN(SampleUse, "new-pm-pgo-sample-use-pipeline", - "Use sampled profile to guide PGO."))); -static cl::opt ProfileFile( - "profile-file", cl::desc("Path to the profile."), cl::Hidden); +extern cl::opt PGOKindFlag; +extern cl::opt ProfileFile; static cl::opt ProfileRemappingFile("profile-remapping-file", cl::desc("Path to the profile remapping file."), Index: tools/opt/opt.cpp =================================================================== --- tools/opt/opt.cpp +++ tools/opt/opt.cpp @@ -271,6 +271,29 @@ cl::desc("YAML output filename for pass remarks"), cl::value_desc("filename")); +enum PGOKind { NoPGO, InstrGen, InstrUse, SampleUse }; +cl::opt PGOKindFlag( + "pgo-kind", cl::init(NoPGO), cl::Hidden, + cl::desc("The kind of profile guided optimization"), + cl::values( + clEnumValN(NoPGO, "nopgo", "Do not use PGO."), + clEnumValN(InstrGen, "pgo-instr-gen-pipeline", + "Instrument the IR to generate profile."), + clEnumValN(InstrUse, "pgo-instr-use-pipeline", + "Use instrumented profile to guide PGO."), + clEnumValN(SampleUse, "pgo-sample-use-pipeline", + "Use sampled profile to guide PGO."), + clEnumValN( + InstrGen, "new-pm-pgo-instr-gen-pipeline", + "Instrument the IR to generate profile for new pass manager."), + clEnumValN( + InstrUse, "new-pm-pgo-instr-use-pipeline", + "Use instrumented profile to guide PGO for new pass manager."), + clEnumValN(SampleUse, "new-pm-pgo-sample-use-pipeline", + "Use sampled profile to guide PGO for new pass manager."))); +cl::opt ProfileFile("profile-file", + cl::desc("Path to the profile."), cl::Hidden); + class OptCustomPassManager : public legacy::PassManager { DebugifyStatsMap DIStatsMap; @@ -365,6 +388,20 @@ if (Coroutines) addCoroutinePassesToExtensionPoints(Builder); + switch (PGOKindFlag) { + case InstrGen: + Builder.PGOInstrGen = ProfileFile; + break; + case InstrUse: + Builder.PGOInstrUse = ProfileFile; + break; + case SampleUse: + Builder.PGOSampleUse = ProfileFile; + break; + default: + break; + } + Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); }