Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -44,6 +44,7 @@ class PassBuilder { TargetMachine *TM; Optional PGOOpt; + bool PrepareForThinLTO = false; public: /// \brief LLVM-provided high-level optimization levels. @@ -133,8 +134,9 @@ }; explicit PassBuilder(TargetMachine *TM = nullptr, - Optional PGOOpt = None) - : TM(TM), PGOOpt(PGOOpt) {} + Optional PGOOpt = None, + bool PrepareForThinLTO = false) + : TM(TM), PGOOpt(PGOOpt), PrepareForThinLTO(PrepareForThinLTO) {} /// \brief Cross register the analysis managers through their proxies. /// @@ -204,7 +206,8 @@ /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool PerformThinLTO = false); /// Build a pre-link, LTO-targeting default optimization pipeline to a pass /// manager. @@ -235,6 +238,20 @@ ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging = false); + /// Build an ThinLTO default optimization pipeline to a pass manager. + /// + /// This provides a good default optimization pipeline for link-time + /// optimization and code generation. It is particularly tuned to fit well + /// when IR coming into the LTO phase was first run through \c + /// addPreLinkLTODefaultPipeline, and the two coordinate closely. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false); + /// Build the default `AAManager` with the default alias analysis pipeline /// registered. AAManager buildDefaultAAPipeline(); Index: lib/CodeGen/BackendUtil.cpp =================================================================== --- lib/CodeGen/BackendUtil.cpp +++ lib/CodeGen/BackendUtil.cpp @@ -859,7 +859,8 @@ PassBuilder PB(TM.get(), (PGOOpt.RunProfileGen || !PGOOpt.ProfileUseFile.empty()) ? - Optional(PGOOpt) : None); + Optional(PGOOpt) : None, + CodeGenOpts.EmitSummaryIndex /* PrepareForThinLTO */); LoopAnalysisManager LAM; FunctionAnalysisManager FAM; Index: lib/LTO/LTOBackend.cpp =================================================================== --- lib/LTO/LTOBackend.cpp +++ lib/LTO/LTOBackend.cpp @@ -136,7 +136,8 @@ Conf.CodeModel, Conf.CGOptLevel)); } -static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel) { +static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel, + bool IsThinLTO) { PassBuilder PB(TM); AAManager AA; @@ -180,7 +181,10 @@ break; } - MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */); + if (IsThinLTO) + MPM = PB.buildThinLTODefaultPipeline(OL, false /* DebugLogging */); + else + MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */); MPM.run(Mod, MAM); // FIXME (davide): verify the output. @@ -258,17 +262,12 @@ bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) { - // There's still no ThinLTO pipeline hooked up in the new pass manager, - // once there is one, we can just remove this. - if (LTOUseNewPM && IsThinLTO) - report_fatal_error("ThinLTO not supported with the new PM yet!"); - // FIXME: Plumb the combined index into the new pass manager. if (!Conf.OptPipeline.empty()) runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); else if (LTOUseNewPM) - runNewPMPasses(Mod, TM, Conf.OptLevel); + runNewPMPasses(Mod, TM, Conf.OptLevel, IsThinLTO); else runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -452,14 +452,39 @@ } ModulePassManager +PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging) +{ + // FIXME: The summary index is not hooked in the new pass manager yet. + // When it's going to be hooked, enable WholeProgramDevirt and LowerTypeTest + // here. + + return buildPerModuleDefaultPipeline(Level, DebugLogging, + true /* PerformThinLTO */); +} + +ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool PerformThinLTO) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); // Force any function attributes we want the rest of the pipeline te observe. MPM.addPass(ForceFunctionAttrsPass()); + // For ThinLTO there are two passes of indirect call promotion. The + // first is during the compile phase when PerformThinLTO=false and + // intra-module indirect call targets are promoted. The second is during + // the ThinLTO backend when PerformThinLTO=true, when we promote imported + // inter-module indirect calls. For that we perform indirect call promotion + // earlier in the pass pipeline, here before globalopt. Otherwise imported + // available_externally functions look unreferenced and are removed. + if (PerformThinLTO) + MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, + PGOOpt && PGOOpt->SamplePGO && + !PGOOpt->ProfileUseFile.empty())); + // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); @@ -502,16 +527,19 @@ GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); - // Add all the requested passes for PGO Instrumentation, if requested. - if (PGOOpt) { - assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO || - !PGOOpt->ProfileUseFile.empty()); - addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, - PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); - } + // FIXME: `PrepareForThinLTOUsingPGOSampleProfile`. + if (!PerformThinLTO) { + // Add all the requested passes for PGO Instrumentation, if requested. + if (PGOOpt) { + assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO || + !PGOOpt->ProfileUseFile.empty()); + addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, + PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); + } - // Indirect call promotion that promotes intra-module targes only. - MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO)); + // Indirect call promotion that promotes intra-module targes only. + MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO)); + } // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. @@ -571,13 +599,37 @@ // Eliminate externally available functions now that inlining is over -- we // won't emit these anyways. - MPM.addPass(EliminateAvailableExternallyPass()); + if (!PrepareForThinLTO) + // Remove avail extern fns and globals definitions if we aren't + // compiling an object file for later LTO. For LTO we want to preserve + // these so they are eligible for inlining at link-time. Note if they + // are unreferenced they will be removed by GlobalDCE later, so + // this only impacts referenced available externally globals. + // Eventually they will be suppressed during codegen, but eliminating + // here enables more opportunity for GlobalDCE as it may make + // globals referenced by available external functions dead + // and saves running remaining passes on the eliminated functions. + MPM.addPass(EliminateAvailableExternallyPass()); // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. // FIXME: Is this really an optimization rather than a canonicalization? MPM.addPass(ReversePostOrderFunctionAttrsPass()); + + // If we are planning to perform ThinLTO later, let's not bloat the code with + // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes + // during ThinLTO and perform the rest of the optimizations afterward. + if (PrepareForThinLTO) { + // Reduce the size of the IR as much as possible. + MPM.addPass(GlobalOptPass()); + + // Rename anon globals to be able to export them in the summary. + MPM.addPass(NameAnonGlobalPass()); + return MPM; + } + + // Re-require GloblasAA here prior to function passes. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal @@ -587,6 +639,11 @@ // memory operations. MPM.addPass(RequireAnalysisPass()); + // Optimize globals now when performing ThinLTO, this enables more + // optimizations later. + if (PerformThinLTO) + MPM.addPass(GlobalOptPass()); + FunctionPassManager OptimizePM(DebugLogging); OptimizePM.addPass(Float2IntPass()); // FIXME: We need to run some loop optimizations to re-rotate loops after Index: test/ThinLTO/X86/error-newpm.ll =================================================================== --- test/ThinLTO/X86/error-newpm.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt -module-summary %s -o %t1.bc -; RUN: not llvm-lto2 run %t1.bc -o %t.o \ -; RUN: -r=%t1.bc,_tinkywinky,pxl \ -; RUN: -lto-use-new-pm 2>&1 | FileCheck %s - -; CHECK: ThinLTO not supported with the new PM yet! - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.11.0" - -define void @tinkywinky() { - ret void -} Index: test/ThinLTO/X86/newpm-basic.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/newpm-basic.ll @@ -0,0 +1,11 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: llvm-lto2 run %t1.bc -o %t.o \ +; RUN: -r=%t1.bc,_tinkywinky,pxl \ +; RUN: -lto-use-new-pm + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @tinkywinky() { + ret void +}