Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -540,8 +540,32 @@ EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); + // In SamplePGO ThinLTO backend, we need instcombine before profile annotation + // to convert bitcast to direct calls so that they can be inlined during the + // profile annotation prepration step. + // More details about SamplePGO design can be found in: + // https://research.google.com/pubs/pub45290.html + // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. + if (PGOOpt && !PGOOpt->SampleProfileFile.empty() && + Phase == ThinLTOPhase::PostLink) + EarlyFPM.addPass(InstCombinePass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { + // Annotate sample profile right after early FPM to ensure freshness of + // the debug info. + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard + // for the profile annotation to be accurate in the ThinLTO backend. + if (Phase != ThinLTOPhase::PreLink) + // We perform early indirect call promotion here, before globalopt. + // This is important for the ThinLTO backend phase because otherwise + // imported available_externally functions look unreferenced and are + // removed. + MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink, + true)); + } + // Interprocedural constant propagation now that basic cleanup has occured // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -768,13 +792,8 @@ // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); - if (PGOOpt && PGOOpt->SamplePGOSupport) { + if (PGOOpt && PGOOpt->SamplePGOSupport) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); - if (!PGOOpt->SampleProfileFile.empty()) { - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); - MPM.addPass(PGOIndirectCallPromotion(false, true)); - } - } // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None, @@ -796,14 +815,8 @@ // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); - // Invoke the SamplePGO annotation pass for the first time to annotate - // profile for functions in the current module to give ThinLink info - // about module grouping. - if (PGOOpt && PGOOpt->SamplePGOSupport) { + if (PGOOpt && PGOOpt->SamplePGOSupport) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); - if (!PGOOpt->SampleProfileFile.empty()) - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); - } // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we @@ -839,16 +852,14 @@ // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); - // Invoke the SamplePGO annotation pass for the second time to annotate on - // functions imported from other modules. - if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); - // During the ThinLTO backend phase we perform early indirect call promotion // here, before globalopt. Otherwise imported available_externally functions // look unreferenced and are removed. - MPM.addPass(PGOIndirectCallPromotion( - true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + // FIXME: move this into buildModuleSimplificationPipeline to merge the logic + // with SamplePGO. + if (PGOOpt && !PGOOpt->ProfileUseFile.empty()) + MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, + false /* SamplePGO */)); // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink, Index: test/Other/new-pm-pgo.ll =================================================================== --- test/Other/new-pm-pgo.ll +++ test/Other/new-pm-pgo.ll @@ -1,13 +1,27 @@ ; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=new-pm-pgo-instr-gen-pipeline -profile-file='temp' %s 2>&1 |FileCheck %s --check-prefixes=GEN ; RUN: llvm-profdata merge %S/Inputs/new-pm-pgo.proftext -o %t.profdata ; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=new-pm-pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE -; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 |FileCheck %s --check-prefixes=SAMPLE_USE +; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \ +; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_O +; RUN: opt -debug-pass-manager -passes='thinlto-pre-link' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \ +; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_PRE_LINK +; RUN: opt -debug-pass-manager -passes='thinlto' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \ +; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_POST_LINK ; RUN: opt -debug-pass-manager -passes='default' -new-pm-debug-info-for-profiling %s 2>&1 |FileCheck %s --check-prefixes=SAMPLE_GEN ; ; GEN: Running pass: PGOInstrumentationGen ; USE: Running pass: PGOInstrumentationUse -; SAMPLE_USE: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> +; SAMPLE_USE_O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> +; SAMPLE_USE_PRE_LINK: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> +; SAMPLE_USE: Running pass: SimplifyCFGPass +; SAMPLE_USE: Running pass: SROA +; SAMPLE_USE: Running pass: EarlyCSEPass +; SAMPLE_USE: Running pass: LowerExpectIntrinsicPass +; SAMPLE_USE_POST_LINK: Running pass: InstCombinePass ; SAMPLE_USE: Running pass: SampleProfileLoaderPass +; SAMPLE_USE_O: Running pass: PGOIndirectCallPromotion +; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass +; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion ; SAMPLE_GEN: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> define void @foo() { Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -53,15 +53,13 @@ ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-DIS-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> ; CHECK-DIS-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-POSTLINK-O-NEXT: Running pass: PGOIndirectCallPromotion -; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function -; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-PRELINK-O-NODIS-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis @@ -80,6 +78,7 @@ ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis +; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA