Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -192,6 +192,39 @@ buildFunctionSimplificationPipeline(OptimizationLevel Level, bool DebugLogging = false); + /// Construct the core LLVM module canonicalization and simplification + /// pipeline. + /// + /// This pipeline focuses on canonicalizing and simplifying the entire module + /// of IR. Much like the function simplification pipeline above, it is + /// suitable to run repeatedly over the IR and is not expected to destroy + /// important information. It does, however, perform inlining and other + /// heuristic based simplifications that are not strictly reversible. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager + buildModuleSimplificationPipeline(OptimizationLevel Level, + bool DebugLogging = false); + + /// Construct the core LLVM module optimization pipeline. + /// + /// This pipeline focuses on optimizing the execution speed of the IR. It + /// uses cost modeling and thresholds to balance code growth against runtime + /// improvements. It includes vectorization and other information destroying + /// transformations. It also cannot generally be run repeatedly on a module + /// without potentially seriously regressing either runtime performance of + /// the code or serious code size growth. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, + bool DebugLogging = false); + /// Build a per-module default optimization pipeline. /// /// This provides a good default optimization pipeline for per-module @@ -206,6 +239,36 @@ ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool DebugLogging = false); + /// Build a pre-link, ThinLTO-targeting default optimization pipeline to + /// a pass manager. + /// + /// This adds the pre-link optimizations tuned to prepare a module for + /// a ThinLTO run. It works to minimize the IR which needs to be analyzed + /// without making irreversible decisions which could be made better during + /// the LTO run. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager + buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false); + + /// Build an ThinLTO default optimization pipeline to a pass manager. + /// + /// This provides a good default optimization pipeline for link-time + /// optimization and code generation. It is particularly tuned to fit well + /// when IR coming into the LTO phase was first run through \c + /// addPreLinkLTODefaultPipeline, and the two coordinate closely. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false); + /// Build a pre-link, LTO-targeting default optimization pipeline to a pass /// manager. /// Index: lib/LTO/LTOBackend.cpp =================================================================== --- lib/LTO/LTOBackend.cpp +++ lib/LTO/LTOBackend.cpp @@ -136,7 +136,8 @@ Conf.CodeModel, Conf.CGOptLevel)); } -static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel) { +static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel, + bool IsThinLTO) { PassBuilder PB(TM); AAManager AA; @@ -180,7 +181,10 @@ break; } - MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */); + if (IsThinLTO) + MPM = PB.buildThinLTODefaultPipeline(OL, false /* DebugLogging */); + else + MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */); MPM.run(Mod, MAM); // FIXME (davide): verify the output. @@ -258,17 +262,12 @@ bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) { - // There's still no ThinLTO pipeline hooked up in the new pass manager, - // once there is one, we can just remove this. - if (LTOUseNewPM && IsThinLTO) - report_fatal_error("ThinLTO not supported with the new PM yet!"); - // FIXME: Plumb the combined index into the new pass manager. if (!Conf.OptPipeline.empty()) runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); else if (LTOUseNewPM) - runNewPMPasses(Mod, TM, Conf.OptLevel); + runNewPMPasses(Mod, TM, Conf.OptLevel, IsThinLTO); else runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -348,6 +348,9 @@ LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(LoopDeletionPass()); + // FIXME: The old pass manager has a hack to disable loop unrolling during + // ThinLTO when using sample PGO. Need to either fix it or port some + // workaround. LPM2.addPass(LoopUnrollPass::createFull(Level)); // We provide the opt remark emitter pass for LICM to use. We only need to do @@ -457,14 +460,10 @@ } ModulePassManager -PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); +PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, + bool DebugLogging) { ModulePassManager MPM(DebugLogging); - // Force any function attributes we want the rest of the pipeline te observe. - MPM.addPass(ForceFunctionAttrsPass()); - // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); @@ -507,16 +506,16 @@ GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); - // Add all the requested passes for PGO Instrumentation, if requested. + // Add all the requested passes for PGO, if requested. if (PGOOpt) { assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO || !PGOOpt->ProfileUseFile.empty()); addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); - } - // Indirect call promotion that promotes intra-module targes only. - MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO)); + // Indirect call promotion that promotes intra-module targes only. + MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO)); + } // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. @@ -565,17 +564,30 @@ createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( std::move(MainCGPipeline), MaxDevirtIterations, DebugLogging))); - // This ends the canonicalization and simplification phase of the pipeline. - // At this point, we expect to have canonical and simple IR which we begin - // *optimizing* for efficient execution going forward. + return MPM; +} + +ModulePassManager +PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, + bool DebugLogging) { + ModulePassManager MPM(DebugLogging); + + // Optimize globals now that the module is fully simplified. + MPM.addPass(GlobalOptPass()); // Run partial inlining pass to partially inline functions that have // large bodies. if (RunPartialInlining) MPM.addPass(PartialInlinerPass()); - // Eliminate externally available functions now that inlining is over -- we - // won't emit these anyways. + // Remove avail extern fns and globals definitions if we aren't compiling an + // object file for later LTO. For LTO we want to preserve these so they are + // eligible for inlining at link-time. Note if they are unreferenced they + // will be removed by GlobalDCE later, so this only impacts referenced + // available externally globals. Eventually they will be suppressed during + // codegen, but eliminating here enables more opportunity for GlobalDCE as it + // may make globals referenced by available external functions dead and saves + // running remaining passes on the eliminated functions. MPM.addPass(EliminateAvailableExternallyPass()); // Do RPO function attribute inference across the module to forward-propagate @@ -674,6 +686,77 @@ } ModulePassManager +PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline te observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // Add the core simplification pipeline. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + + // Now add the optimization pipeline. + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + + return MPM; +} + +ModulePassManager +PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline te observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // If we are planning to perform ThinLTO later, we don't bloat the code with + // unrolling/vectorization/... now. Just simplify the module as much as we + // can. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + + // Reduce the size of the IR as much as possible. + MPM.addPass(GlobalOptPass()); + + // Rename anon globals to be able to export them in the summary. + MPM.addPass(NameAnonGlobalPass()); + + return MPM; +} + +ModulePassManager +PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + // FIXME: The summary index is not hooked in the new pass manager yet. + // When it's going to be hooked, enable WholeProgramDevirt and LowerTypeTest + // here. + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline te observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // During the ThinLTO backend phase we perform early indirect call promotion + // here, before globalopt. Otherwise imported available_externally functions + // look unreferenced and are removed. + MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, + PGOOpt && PGOOpt->SamplePGO && + !PGOOpt->ProfileUseFile.empty())); + + // Add the core simplification pipeline. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + + // Now add the optimization pipeline. + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + + return MPM; +} + +ModulePassManager PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -30,6 +30,8 @@ ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass +; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> @@ -53,7 +55,6 @@ ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. -; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis @@ -133,6 +134,10 @@ ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. +; CHECK-O-NEXT: Finished llvm::Module pass manager run. +; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Starting llvm::Module pass manager run. +; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA @@ -162,6 +167,7 @@ ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass ; CHECK-O-NEXT: Finished llvm::Module pass manager run. +; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: PrintModulePass ; ; Make sure we get the IR back out without changes when we print the module. Index: test/ThinLTO/X86/newpm-basic.ll =================================================================== --- test/ThinLTO/X86/newpm-basic.ll +++ test/ThinLTO/X86/newpm-basic.ll @@ -1,9 +1,7 @@ ; RUN: opt -module-summary %s -o %t1.bc -; RUN: not llvm-lto2 run %t1.bc -o %t.o \ +; RUN: llvm-lto2 run %t1.bc -o %t.o \ ; RUN: -r=%t1.bc,_tinkywinky,pxl \ -; RUN: -lto-use-new-pm 2>&1 | FileCheck %s - -; CHECK: ThinLTO not supported with the new PM yet! +; RUN: -lto-use-new-pm target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0"