diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -923,7 +923,7 @@ llvm_unreachable("Invalid optimization level!"); case 1: - return PassBuilder::O1; + return PassBuilder::OptimizationLevel::O1; case 2: switch (Opts.OptimizeSize) { @@ -931,17 +931,17 @@ llvm_unreachable("Invalid optimization level for size!"); case 0: - return PassBuilder::O2; + return PassBuilder::OptimizationLevel::O2; case 1: - return PassBuilder::Os; + return PassBuilder::OptimizationLevel::Os; case 2: - return PassBuilder::Oz; + return PassBuilder::OptimizationLevel::Oz; } case 3: - return PassBuilder::O3; + return PassBuilder::OptimizationLevel::O3; } } diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -143,11 +143,16 @@ /// /// This enumerates the LLVM-provided high-level optimization levels. Each /// level has a specific goal and rationale. - enum OptimizationLevel { + class OptimizationLevel final { + unsigned Level; + OptimizationLevel(unsigned L) : Level(L) {} + + public: + OptimizationLevel() : OptimizationLevel(0) {} /// Disable as many optimizations as possible. This doesn't completely /// disable the optimizer in all cases, for example always_inline functions /// can be required to be inlined for correctness. - O0, + static const OptimizationLevel O0; /// Optimize quickly without destroying debuggability. /// @@ -161,10 +166,9 @@ /// /// As an example, complex loop transformations such as versioning, /// vectorization, or fusion don't make sense here due to the degree to - /// which the executed code differs from the source code, and the compile time - /// cost. - O1, - + /// which the executed code differs from the source code, and the compile + /// time cost. + static const OptimizationLevel O1; /// Optimize for fast execution as much as possible without triggering /// significant incremental compile time or code size growth. /// @@ -181,8 +185,7 @@ /// /// This is expected to be a good default optimization level for the vast /// majority of users. - O2, - + static const OptimizationLevel O2; /// Optimize for fast execution as much as possible. /// /// This mode is significantly more aggressive in trading off compile time @@ -197,8 +200,7 @@ /// order to make even significantly slower compile times at least scale /// reasonably. This does not preclude very substantial constant factor /// costs though. - O3, - + static const OptimizationLevel O3; /// Similar to \c O2 but tries to optimize for small code size instead of /// fast execution without triggering significant incremental execution /// time slowdowns. @@ -209,8 +211,7 @@ /// A consequence of the different core goal is that this should in general /// produce substantially smaller executables that still run in /// a reasonable amount of time. - Os, - + static const OptimizationLevel Os; /// A very specialized mode that will optimize for code size at any and all /// costs. /// @@ -218,7 +219,30 @@ /// any effort taken to reduce the size is worth it regardless of the /// execution time impact. You should expect this level to produce rather /// slow, but very small, code. - Oz + static const OptimizationLevel Oz; + + bool isOptimizingForSpeed() const { return Level > 0 && Level < 4; } + bool isOptimizingForSize() const { return Level == 4 || Level == 5; } + bool isO2Or3() const { return Level == 2 || Level == 3; } + bool operator==(const OptimizationLevel &Other) const { + return Level == Other.Level; + } + bool operator!=(const OptimizationLevel &Other) const { + return Level != Other.Level; + } + + unsigned getSpeedupLevel() const { + if (isOptimizingForSpeed()) + return Level; + return 0; + } + + unsigned getSizeLevel() const { + // Map Os to 1 and Oz to 2, everything else to 0; + if (isOptimizingForSize()) + return Level - 3; + return 0; + } }; explicit PassBuilder(TargetMachine *TM = nullptr, @@ -246,10 +270,10 @@ /// Registers all available CGSCC analysis passes. /// - /// This is an interface that can be used to populate a \c CGSCCAnalysisManager - /// with all registered CGSCC analyses. Callers can still manually register any - /// additional analyses. Callers can also pre-register analyses and this will - /// not override those. + /// This is an interface that can be used to populate a \c + /// CGSCCAnalysisManager with all registered CGSCC analyses. Callers can still + /// manually register any additional analyses. Callers can also pre-register + /// analyses and this will not override those. void registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM); /// Registers all available function analysis passes. @@ -281,10 +305,8 @@ /// build them. /// /// \p Phase indicates the current ThinLTO phase. - FunctionPassManager - buildFunctionSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging = false); + FunctionPassManager buildFunctionSimplificationPipeline( + OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging = false); /// Construct the core LLVM module canonicalization and simplification /// pipeline. @@ -302,8 +324,7 @@ /// /// \p Phase indicates the current ThinLTO phase. ModulePassManager - buildModuleSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, + buildModuleSimplificationPipeline(OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging = false); /// Construct the core LLVM module optimization pipeline. @@ -631,7 +652,6 @@ std::string ProfileFile, std::string ProfileRemappingFile); - /// Returns PIC. External libraries can use this to register pass /// instrumentation callbacks. PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const { @@ -767,6 +787,6 @@ return false; } -} +} // namespace llvm #endif diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -203,16 +203,16 @@ default: llvm_unreachable("Invalid optimization level"); case 0: - OL = PassBuilder::O0; + OL = PassBuilder::OptimizationLevel::O0; break; case 1: - OL = PassBuilder::O1; + OL = PassBuilder::OptimizationLevel::O1; break; case 2: - OL = PassBuilder::O2; + OL = PassBuilder::OptimizationLevel::O2; break; case 3: - OL = PassBuilder::O3; + OL = PassBuilder::OptimizationLevel::O3; break; } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -192,10 +192,9 @@ cl::Hidden, cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); -static cl::opt - RunNewGVN("enable-npm-newgvn", cl::init(false), - cl::Hidden, cl::ZeroOrMore, - cl::desc("Run NewGVN instead of GVN")); +static cl::opt RunNewGVN("enable-npm-newgvn", cl::init(false), cl::Hidden, + cl::ZeroOrMore, + cl::desc("Run NewGVN instead of GVN")); static cl::opt EnableGVNHoist( "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, @@ -238,20 +237,12 @@ extern cl::opt FlattenedProfileUsed; -static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { - switch (Level) { - case PassBuilder::O0: - case PassBuilder::O1: - case PassBuilder::O2: - case PassBuilder::O3: - return false; - - case PassBuilder::Os: - case PassBuilder::Oz: - return true; - } - llvm_unreachable("Invalid optimization level!"); -} +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {1}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {2}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {3}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {4}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {5}; namespace { @@ -386,11 +377,9 @@ C(LAM); } -FunctionPassManager -PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging) { - assert(Level != O0 && "Must request optimizations!"); +FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline( + OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { + assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); // Form SSA out of local memory accesses after breaking apart aggregates into @@ -401,7 +390,7 @@ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. - if (Level > O1) { + if (Level.isO2Or3() || Level.isOptimizingForSize()) { if (EnableGVNHoist) FPM.addPass(GVNHoistPass()); @@ -413,31 +402,31 @@ } // Speculative execution if the target has divergent branches; otherwise nop. - if (Level > O1) { + if (Level.isO2Or3() || Level.isOptimizingForSize()) { FPM.addPass(SpeculativeExecutionPass()); - // Optimize based on known information about branches, and cleanup afterward. + // Optimize based on known information about branches, and cleanup + // afterward. FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); } FPM.addPass(SimplifyCFGPass()); - if (Level == O3) + if (Level == OptimizationLevel::O3) FPM.addPass(AggressiveInstCombinePass()); FPM.addPass(InstCombinePass()); - if (!isOptimizingForSize(Level)) + if (!Level.isOptimizingForSize()) FPM.addPass(LibCallsShrinkWrapPass()); invokePeepholeEPCallbacks(FPM, Level); // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. - if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && - !isOptimizingForSize(Level) && Level > O1) + if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && Level.isO2Or3()) FPM.addPass(PGOMemOPSizeOpt()); // TODO: Investigate the cost/benefit of tail call elimination on debugging. - if (Level > O1) + if (Level.isO2Or3() || Level.isOptimizingForSize()) FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -464,7 +453,7 @@ LPM1.addPass(LoopSimplifyCFGPass()); // Rotate Loop - disable header duplication at -Oz - LPM1.addPass(LoopRotatePass(Level != Oz)); + LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz)); // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); @@ -481,7 +470,8 @@ if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) && PTO.LoopUnrolling) - LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false, + LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), + /*OnlyWhenForced=*/false, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) @@ -489,7 +479,8 @@ // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. - FPM.addPass(RequireAnalysisPass()); + FPM.addPass( + RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM1), EnableMSSALoopDependency, DebugLogging)); FPM.addPass(SimplifyCFGPass()); @@ -504,7 +495,7 @@ FPM.addPass(SROA()); // Eliminate redundancies. - if (Level != O1) { + if (Level != OptimizationLevel::O1) { // These passes add substantial compile time so skip them at O1. FPM.addPass(MergedLoadStoreMotionPass()); if (RunNewGVN) @@ -533,7 +524,7 @@ // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. - if (Level > O1) { + if (Level.isO2Or3() || Level.isOptimizingForSize()) { FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); FPM.addPass(DSEPass()); @@ -553,7 +544,7 @@ FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); - if (EnableCHR && Level == O3 && PGOOpt && + if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt && (PGOOpt->Action == PGOOptions::IRUse || PGOOpt->Action == PGOOptions::SampleUse)) FPM.addPass(ControlHeightReductionPass()); @@ -566,13 +557,13 @@ bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile) { - assert(Level != O0 && "Not expecting O0 here!"); + assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification // at -Os/Oz. We will not do this inline for context sensistive PGO (when // IsCS is true). - if (!isOptimizingForSize(Level) && !IsCS) { + if (!Level.isOptimizingForSize() && !IsCS) { InlineParams IP; // In the old pass manager, this is a cl::opt. Should still this be one? @@ -658,16 +649,11 @@ static InlineParams getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) { - auto O3 = PassBuilder::O3; - unsigned OptLevel = Level > O3 ? 2 : Level; - unsigned SizeLevel = Level > O3 ? Level - O3 : 0; - return getInlineParams(OptLevel, SizeLevel); + return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); } -ModulePassManager -PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging) { +ModulePassManager PassBuilder::buildModuleSimplificationPipeline( + OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { ModulePassManager MPM(DebugLogging); bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); @@ -707,7 +693,7 @@ EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); - if (Level == O3) + if (Level == OptimizationLevel::O3) EarlyFPM.addPass(CallSiteSplittingPass()); // In SamplePGO ThinLTO backend, we need instcombine before profile annotation @@ -826,7 +812,7 @@ // When at O3 add argument promotion to the pass pipeline. // FIXME: It isn't at all clear why this should be limited to O3. - if (Level == O3) + if (Level == OptimizationLevel::O3) MainCGPipeline.addPass(ArgumentPromotionPass()); // Lastly, add the core function simplification pipeline nested inside the @@ -950,11 +936,11 @@ // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). - forwardSwitchCondToPhi(true). - convertSwitchToLookupTable(true). - needCanonicalLoops(false). - sinkCommonInsts(true))); + OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .sinkCommonInsts(true))); // Optimize parallel scalar instruction chains into SIMD instructions. if (PTO.SLPVectorization) @@ -970,14 +956,15 @@ // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) { - OptimizePM.addPass(LoopUnrollAndJamPass(Level)); + OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); } - OptimizePM.addPass(LoopUnrollPass( - LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll))); + OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions( + Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll))); OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); - OptimizePM.addPass(RequireAnalysisPass()); + OptimizePM.addPass( + RequireAnalysisPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), EnableMSSALoopDependency, DebugLogging)); @@ -1036,7 +1023,8 @@ ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -1063,7 +1051,8 @@ ModulePassManager PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -1124,7 +1113,7 @@ MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); } - if (Level == O0) + if (Level == OptimizationLevel::O0) return MPM; // Force any function attributes we want the rest of the pipeline to observe. @@ -1143,10 +1132,11 @@ ModulePassManager PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! return buildPerModuleDefaultPipeline(Level, DebugLogging, - /* LTOPreLink */true); + /* LTOPreLink */ true); } ModulePassManager @@ -1154,7 +1144,7 @@ ModuleSummaryIndex *ExportSummary) { ModulePassManager MPM(DebugLogging); - if (Level == O0) { + if (Level == OptimizationLevel::O0) { // The WPD and LowerTypeTest passes need to run at -O0 to lower type // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); @@ -1183,7 +1173,7 @@ // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); - if (Level > 1) { + if (Level.isO2Or3() || Level.isOptimizingForSize()) { FunctionPassManager EarlyFPM(DebugLogging); EarlyFPM.addPass(CallSiteSplittingPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); @@ -1197,16 +1187,16 @@ // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. - MPM.addPass(IPSCCPPass()); + MPM.addPass(IPSCCPPass()); - // Attach metadata to indirect call sites indicating the set of functions - // they may target at run-time. This should follow IPSCCP. - MPM.addPass(CalledValuePropagationPass()); + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); } // Now deduce any function attributes based in the current code. - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - PostOrderFunctionAttrsPass())); + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. @@ -1221,7 +1211,7 @@ MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); // Stop here at -O1. - if (Level == 1) { + if (Level == OptimizationLevel::O1) { // The LowerTypeTestsPass needs to run to lower type metadata and the // type.test intrinsics. The pass does nothing if CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); @@ -1246,7 +1236,7 @@ // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. FunctionPassManager PeepholeFPM(DebugLogging); - if (Level == O3) + if (Level == OptimizationLevel::O3) PeepholeFPM.addPass(AggressiveInstCombinePass()); PeepholeFPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(PeepholeFPM, Level); @@ -1298,8 +1288,8 @@ // Run a few AA driver optimizations here and now to cleanup the code. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - PostOrderFunctionAttrsPass())); + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); // FIXME: here we run IP alias analysis in the legacy PM. FunctionPassManager MainFPM; @@ -1559,7 +1549,8 @@ return make_error( formatv("invalid argument to SimplifyCFG pass bonus-threshold " "parameter: '{0}' ", - ParamName).str(), + ParamName) + .str(), inconvertibleErrorCode()); Result.bonusInstThreshold(BonusInstThreshold.getSExtValue()); } else { @@ -1882,13 +1873,13 @@ assert(Matches.size() == 3 && "Must capture two matched strings!"); OptimizationLevel L = StringSwitch(Matches[2]) - .Case("O0", O0) - .Case("O1", O1) - .Case("O2", O2) - .Case("O3", O3) - .Case("Os", Os) - .Case("Oz", Oz); - if (L == O0) { + .Case("O0", OptimizationLevel::O0) + .Case("O1", OptimizationLevel::O1) + .Case("O2", OptimizationLevel::O2) + .Case("O3", OptimizationLevel::O3) + .Case("Os", OptimizationLevel::Os) + .Case("Oz", OptimizationLevel::Oz); + if (L == OptimizationLevel::O0) { // Add instrumentation PGO passes -- at O0 we can still do PGO. if (PGOOpt && Matches[1] != "thinlto" && (PGOOpt->Action == PGOOptions::IRInstr || @@ -1905,8 +1896,8 @@ // This is consistent with old pass manager invoked via opt, but // inconsistent with clang. Clang doesn't enable loop vectorization // but does enable slp vectorization at Oz. - PTO.LoopVectorization = L > O1 && L < Oz; - PTO.SLPVectorization = L > O1 && L < Oz; + PTO.LoopVectorization = L.isO2Or3() || L == OptimizationLevel::Os; + PTO.SLPVectorization = L.isO2Or3() || L == OptimizationLevel::Os; if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));