diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -343,6 +343,12 @@ ThinLTOPhase Phase, bool DebugLogging = false); + /// Construct the module pipeline that performs inlining as well as + /// the inlining-driven cleanups. + ModulePassManager buildInlinerPipeline(OptimizationLevel Level, + ThinLTOPhase Phase, + bool DebugLogging = false); + /// Construct the core LLVM module optimization pipeline. /// /// This pipeline focuses on optimizing the execution speed of the IR. It diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -690,10 +690,73 @@ return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); } -ModulePassManager -PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging) { +ModulePassManager PassBuilder::buildInlinerPipeline(OptimizationLevel Level, + ThinLTOPhase Phase, + bool DebugLogging) { + ModulePassManager MPM(DebugLogging); + + // Now begin the main postorder CGSCC pipeline. + // FIXME: The current CGSCC pipeline has its origins in the legacy pass + // manager and trying to emulate its precise behavior. Much of this doesn't + // make a lot of sense and we should revisit the core CGSCC structure. + CGSCCPassManager MainCGPipeline(DebugLogging); + + // Note: historically, the PruneEH pass was run first to deduce nounwind and + // generally clean up exception handling overhead. It isn't clear this is + // valuable as the inliner doesn't currently care whether it is inlining an + // invoke or a call. + + // Run the inliner first. The theory is that we are walking bottom-up and so + // the callees have already been fully optimized, and we want to inline them + // into the callers so that our optimizations can reflect that. + // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO + // because it makes profile annotation in the backend inaccurate. + InlineParams IP = getInlineParamsFromOptLevel(Level); + if (Phase == ThinLTOPhase::PreLink && PGOOpt && + PGOOpt->Action == PGOOptions::SampleUse) + IP.HotCallSiteThreshold = 0; + MainCGPipeline.addPass(InlinerPass(IP)); + + if (AttributorRun & AttributorRunOption::CGSCC) + MainCGPipeline.addPass(AttributorCGSCCPass()); + + if (PTO.Coroutines) + MainCGPipeline.addPass(CoroSplitPass()); + + // Now deduce any function attributes based in the current code. + MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); + + // When at O3 add argument promotion to the pass pipeline. + // FIXME: It isn't at all clear why this should be limited to O3. + if (Level == OptimizationLevel::O3) + MainCGPipeline.addPass(ArgumentPromotionPass()); + + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) + MainCGPipeline.addPass(OpenMPOptPass()); + + // Lastly, add the core function simplification pipeline nested inside the + // CGSCC walk. + MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( + buildFunctionSimplificationPipeline(Level, Phase, DebugLogging))); + + for (auto &C : CGSCCOptimizerLateEPCallbacks) + C(MainCGPipeline, Level); + + // We wrap the CGSCC pipeline in a devirtualization repeater. This will try + // to detect when we devirtualize indirect calls and iterate the SCC passes + // in that case to try and catch knock-on inlining or function attrs + // opportunities. Then we add it to the module pipeline by walking the SCCs + // in postorder (or bottom-up). + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( + std::move(MainCGPipeline), MaxDevirtIterations))); + return MPM; +} + +ModulePassManager PassBuilder::buildModuleSimplificationPipeline( + OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { ModulePassManager MPM(DebugLogging); bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); @@ -830,64 +893,7 @@ // the inliner pass. MPM.addPass(RequireAnalysisPass()); - // Now begin the main postorder CGSCC pipeline. - // FIXME: The current CGSCC pipeline has its origins in the legacy pass - // manager and trying to emulate its precise behavior. Much of this doesn't - // make a lot of sense and we should revisit the core CGSCC structure. - CGSCCPassManager MainCGPipeline(DebugLogging); - - // Note: historically, the PruneEH pass was run first to deduce nounwind and - // generally clean up exception handling overhead. It isn't clear this is - // valuable as the inliner doesn't currently care whether it is inlining an - // invoke or a call. - - // Run the inliner first. The theory is that we are walking bottom-up and so - // the callees have already been fully optimized, and we want to inline them - // into the callers so that our optimizations can reflect that. - // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO - // because it makes profile annotation in the backend inaccurate. - InlineParams IP = getInlineParamsFromOptLevel(Level); - if (Phase == ThinLTOPhase::PreLink && PGOOpt && - PGOOpt->Action == PGOOptions::SampleUse) - IP.HotCallSiteThreshold = 0; - MainCGPipeline.addPass(InlinerPass(IP)); - - if (AttributorRun & AttributorRunOption::CGSCC) - MainCGPipeline.addPass(AttributorCGSCCPass()); - - if (PTO.Coroutines) - MainCGPipeline.addPass(CoroSplitPass()); - - // Now deduce any function attributes based in the current code. - MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); - - // When at O3 add argument promotion to the pass pipeline. - // FIXME: It isn't at all clear why this should be limited to O3. - if (Level == OptimizationLevel::O3) - MainCGPipeline.addPass(ArgumentPromotionPass()); - - // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if - // there are no OpenMP runtime calls present in the module. - if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) - MainCGPipeline.addPass(OpenMPOptPass()); - - // Lastly, add the core function simplification pipeline nested inside the - // CGSCC walk. - MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( - buildFunctionSimplificationPipeline(Level, Phase, DebugLogging))); - - for (auto &C : CGSCCOptimizerLateEPCallbacks) - C(MainCGPipeline, Level); - - // We wrap the CGSCC pipeline in a devirtualization repeater. This will try - // to detect when we devirtualize indirect calls and iterate the SCC passes - // in that case to try and catch knock-on inlining or function attrs - // opportunities. Then we add it to the module pipeline by walking the SCCs - // in postorder (or bottom-up). - MPM.addPass( - createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( - std::move(MainCGPipeline), MaxDevirtIterations))); - + MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging)); return MPM; } @@ -1260,11 +1266,11 @@ // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. - MPM.addPass(IPSCCPPass()); + MPM.addPass(IPSCCPPass()); - // Attach metadata to indirect call sites indicating the set of functions - // they may target at run-time. This should follow IPSCCP. - MPM.addPass(CalledValuePropagationPass()); + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); } // Now deduce any function attributes based in the current code. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -82,6 +82,8 @@ MODULE_PASS("rewrite-symbols", RewriteSymbolPass()) MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass()) MODULE_PASS("sample-profile", SampleProfileLoaderPass()) +MODULE_PASS("scc-oz-module-inliner", + buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging)) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -132,6 +132,8 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis @@ -226,6 +228,7 @@ ; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. +; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -97,6 +97,8 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis @@ -196,6 +198,7 @@ ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. +; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-POSTLINK-O-NEXT: Starting llvm::Module pass manager run. diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -69,6 +69,8 @@ ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis @@ -167,6 +169,7 @@ ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. +; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -77,6 +77,8 @@ ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis @@ -178,6 +180,7 @@ ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. +; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -96,6 +96,8 @@ ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis @@ -216,6 +218,7 @@ ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. +; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar ; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis on bar diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -77,6 +77,8 @@ ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis @@ -177,6 +179,7 @@ ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. +; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: NameAnonGlobalPass diff --git a/llvm/test/Transforms/Inline/module-inlining.ll b/llvm/test/Transforms/Inline/module-inlining.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/module-inlining.ll @@ -0,0 +1,27 @@ +; modify_value will be inlined into main. With just the inliner pass, at most +; some trivial DCE would happen, which in this case doesn't modify post-inlined +; main much. +; In contrast, with the full set of module inliner-related passes, at the end of +; inlining (incl. function cleanups ran after inlining), main will be reduced to +; a 'ret 10' +; +; RUN: opt -passes=inline -S < %s | FileCheck %s --check-prefix=INLINE --check-prefix=CHECK +; RUN: opt -passes=scc-oz-module-inliner -S < %s | FileCheck %s --check-prefix=MODULE --check-prefix=CHECK + +define void @modify_value({i32, float}* %v) { + %f = getelementptr { i32, float }, { i32, float }* %v, i64 0, i32 0 + store i32 10, i32* %f + ret void +} + +define i32 @main() { + %my_val = alloca {i32, float} + call void @modify_value({i32, float}* %my_val) + %f = getelementptr { i32, float }, { i32, float }* %my_val, i64 0, i32 0 + %ret = load i32, i32* %f + ret i32 %ret +} + +; CHECK-LABEL: @main +; INLINE-NEXT: %my_val = alloca +; MODULE-NEXT: ret i32 10 \ No newline at end of file