Index: lib/LTO/LTOBackend.cpp =================================================================== --- lib/LTO/LTOBackend.cpp +++ lib/LTO/LTOBackend.cpp @@ -131,9 +131,13 @@ Conf.CodeModel, Conf.CGOptLevel)); } -static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel, - bool IsThinLTO) { - PassBuilder PB(TM); +static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, + unsigned OptLevel, bool IsThinLTO) { + Optional PGOOpt; + if (!Conf.SampleProfile.empty()) + PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true); + + PassBuilder PB(TM, PGOOpt); AAManager AA; // Parse a custom AA pipeline if asked to. @@ -262,7 +266,7 @@ runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); else if (Conf.UseNewPM) - runNewPMPasses(Mod, TM, Conf.OptLevel, IsThinLTO); + runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO); else runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -570,25 +570,6 @@ GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); - // Add all the requested passes for PGO, if requested. - if (PGOOpt) { - if (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty()) - // Instrumentation based PGO (gen and use) - addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, - PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); - else if (!PGOOpt->SampleProfileFile.empty()) - // SamplePGO use - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); - - // Indirect call promotion that promotes intra-module targes only. - // Do not enable it in PrepareForThinLTO phase during sample PGO because - // it changes IR to makes profile annotation in back compile inaccurate. - if ((!PrepareForThinLTO && !PGOOpt->SampleProfileFile.empty()) - || !PGOOpt->ProfileUseFile.empty()) - MPM.addPass(PGOIndirectCallPromotion( - false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); - } - // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. MPM.addPass(RequireAnalysisPass()); @@ -778,8 +759,18 @@ // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); - if (PGOOpt && PGOOpt->SamplePGOSupport) - MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + if (PGOOpt) { + if (PGOOpt->SamplePGOSupport) + MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + if (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty()) + addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, + PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); + else if (!PGOOpt->SampleProfileFile.empty()) + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + if (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty()) + MPM.addPass(PGOIndirectCallPromotion( + false, !PGOOpt->SampleProfileFile.empty())); + } // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, @@ -801,9 +792,18 @@ // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); - if (PGOOpt && PGOOpt->SamplePGOSupport) - MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + if (PGOOpt) { + if (PGOOpt->SamplePGOSupport) + MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + if (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty()) { + addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, + PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); + if (!PGOOpt->ProfileUseFile.empty()) + MPM.addPass(PGOIndirectCallPromotion(false, false)); + } else if (!PGOOpt->SampleProfileFile.empty()) + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + } // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we // can. @@ -838,12 +838,15 @@ // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); + // Invoke the SampleProfileLoader the 2nd time to annotate profile. + if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + // During the ThinLTO backend phase we perform early indirect call promotion // here, before globalopt. Otherwise imported available_externally functions // look unreferenced and are removed. MPM.addPass(PGOIndirectCallPromotion( - true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty() && - !PGOOpt->ProfileUseFile.empty())); + true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, Index: test/LTO/Resolution/X86/Inputs/load-sample-prof-icp.prof =================================================================== --- /dev/null +++ test/LTO/Resolution/X86/Inputs/load-sample-prof-icp.prof @@ -0,0 +1,2 @@ +test:1000:0 + 1: 1000 bar:1000 Index: test/LTO/Resolution/X86/load-sample-prof-icp.ll =================================================================== --- /dev/null +++ test/LTO/Resolution/X86/load-sample-prof-icp.ll @@ -0,0 +1,46 @@ +; Test that llvm-lto2 handles the -lto-sample-profile-file option and pass +; down to the ICP correctly. +; +; RUN: opt -module-summary < %s -o %t.bc +; RUN: llvm-lto2 run -o %t.out %t.bc -save-temps \ +; RUN: -r %t.bc,test,px -r %t.bc,bar,x \ +; RUN: -lto-sample-profile-file=%S/Inputs/load-sample-prof-icp.prof +; RUN: llvm-dis %t.out.0.4.opt.bc -o - | FileCheck %s +; RUN: llvm-lto2 run -o %t.out %t.bc -save-temps \ +; RUN: -r %t.bc,test,px -r %t.bc,bar,x -use-new-pm \ +; RUN: -lto-sample-profile-file=%S/Inputs/load-sample-prof-icp.prof +; RUN: llvm-dis %t.out.0.4.opt.bc -o - | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @test +; Checks that the call instruction is promoted to direct call and has +; profile count annotated on the direct call. +define void @test(void ()*) !dbg !7 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 + ; CHECK: call void @bar(),{{.*}}!prof + call void %3(), !dbg !10 + ret void +} + +declare void @bar() local_unnamed_addr + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "test.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 6.0.0 "} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, unit: !0, variables: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocation(line: 4, column: 5, scope: !7) +!11 = !DILocation(line: 5, column: 1, scope: !7)