Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -44,9 +44,6 @@ /** See llvm::createCFGSimplificationPass function. */ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); -/** See llvm::createLateCFGSimplificationPass function. */ -void LLVMAddLateCFGSimplificationPass(LLVMPassManagerRef PM); - /** See llvm::createDeadStoreEliminationPass function. */ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -173,7 +173,6 @@ void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAVerificationPassPass(PassRegistry&); void initializeLCSSAWrapperPassPass(PassRegistry&); -void initializeLateCFGSimplifyPassPass(PassRegistry&); void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&); void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&); void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -75,7 +75,6 @@ (void) llvm::createCallGraphDOTPrinterPass(); (void) llvm::createCallGraphViewerPass(); (void) llvm::createCFGSimplificationPass(); - (void) llvm::createLateCFGSimplificationPass(); (void) llvm::createCFLAndersAAWrapperPass(); (void) llvm::createCFLSteensAAWrapperPass(); (void) llvm::createStructurizeCFGPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -255,18 +255,12 @@ //===----------------------------------------------------------------------===// // // CFGSimplification - Merge basic blocks, eliminate unreachable blocks, -// simplify terminator instructions, etc... +// simplify terminator instructions, convert switches to lookup tables, etc. // FunctionPass *createCFGSimplificationPass( - int Threshold = -1, std::function Ftor = nullptr); - -//===----------------------------------------------------------------------===// -// -// LateCFGSimplification - Like CFGSimplification, but may also -// convert switches to lookup tables. -// -FunctionPass *createLateCFGSimplificationPass( - int Threshold = -1, std::function Ftor = nullptr); + unsigned Threshold = 1, bool ForwardSwitchCond = false, + bool ConvertSwitch = false, bool KeepLoops = true, + std::function Ftor = nullptr); //===----------------------------------------------------------------------===// // Index: include/llvm/Transforms/Scalar/SimplifyCFG.h =================================================================== --- include/llvm/Transforms/Scalar/SimplifyCFG.h +++ include/llvm/Transforms/Scalar/SimplifyCFG.h @@ -31,11 +31,13 @@ SimplifyCFGOptions Options; public: - /// Construct a pass with default options. - SimplifyCFGPass(); + // TODO: Add default constructor with default options when we're confident + // that we're creating the intended functionality at all points in the + // optimization pipeline. /// Construct a pass with optional optimizations. - SimplifyCFGPass(const SimplifyCFGOptions &PassOptions); + SimplifyCFGPass(const SimplifyCFGOptions &PassOptions) + : Options(PassOptions) {} /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -65,7 +65,7 @@ bool NeedCanonicalLoop; AssumptionCache *AC; - SimplifyCFGOptions(int BonusThreshold = 1, bool ForwardSwitchCond = false, + SimplifyCFGOptions(unsigned BonusThreshold = 1, bool ForwardSwitchCond = false, bool SwitchToLookup = false, bool CanonicalLoops = true, AssumptionCache *AssumpCache = nullptr) : BonusInstThreshold(BonusThreshold), Index: lib/LTO/LTOCodeGenerator.cpp =================================================================== --- lib/LTO/LTOCodeGenerator.cpp +++ lib/LTO/LTOCodeGenerator.cpp @@ -131,7 +131,6 @@ initializeMemCpyOptLegacyPassPass(R); initializeDCELegacyPassPass(R); initializeCFGSimplifyPassPass(R); - initializeLateCFGSimplifyPassPass(R); } void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) { Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -345,7 +345,7 @@ // Global value numbering based sinking. if (EnableGVNSink) { FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass({1, true, true, false})); } // Speculative execution if the target has divergent branches; otherwise nop. @@ -354,7 +354,7 @@ // Optimize based on known information about branches, and cleanup afterward. FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass({1, true, true, false})); FPM.addPass(InstCombinePass()); if (!isOptimizingForSize(Level)) @@ -363,7 +363,7 @@ invokePeepholeEPCallbacks(FPM, Level); FPM.addPass(TailCallElimPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass({1, true, true, false})); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) @@ -404,7 +404,7 @@ // this once as it is immutable. FPM.addPass(RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1))); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass({1, true, true, false})); FPM.addPass(InstCombinePass()); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2))); @@ -449,7 +449,7 @@ // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. FPM.addPass(ADCEPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass({1, true, true, false})); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -483,7 +483,7 @@ FunctionPassManager FPM; FPM.addPass(SROA()); FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. - FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. + FPM.addPass(SimplifyCFGPass({1, true, true, false})); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); @@ -537,7 +537,7 @@ // Create an early function pass manager to cleanup the output of the // frontend. FunctionPassManager EarlyFPM(DebugLogging); - EarlyFPM.addPass(SimplifyCFGPass()); + EarlyFPM.addPass(SimplifyCFGPass({1, true, true, false})); EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); @@ -594,7 +594,7 @@ GlobalCleanupPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(GlobalCleanupPM, Level); - GlobalCleanupPM.addPass(SimplifyCFGPass()); + GlobalCleanupPM.addPass(SimplifyCFGPass({1, true, true, false})); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); // Add all the requested passes for instrumentation PGO, if requested. @@ -741,7 +741,7 @@ OptimizePM.addPass(SLPVectorizerPass()); // Cleanup after all of the vectorizers. - OptimizePM.addPass(SimplifyCFGPass()); + OptimizePM.addPass(SimplifyCFGPass({1, true, true, false})); OptimizePM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any parallel @@ -775,7 +775,7 @@ // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - OptimizePM.addPass(SimplifyCFGPass()); + OptimizePM.addPass(SimplifyCFGPass({1, true, true, false})); // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); @@ -1014,7 +1014,7 @@ // are sorted out. MainFPM.addPass(InstCombinePass()); - MainFPM.addPass(SimplifyCFGPass()); + MainFPM.addPass(SimplifyCFGPass({1, true, true, false})); MainFPM.addPass(SCCPPass()); MainFPM.addPass(InstCombinePass()); MainFPM.addPass(BDCEPass()); @@ -1051,7 +1051,8 @@ // Add late LTO optimization passes. // Delete basic blocks, which optimization passes may have killed. - MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); + MPM.addPass(createModuleToFunctionPassAdaptor( + SimplifyCFGPass({1, true, true, false}))); // Drop bodies of available eternally objects to improve GlobalDCE. MPM.addPass(EliminateAvailableExternallyPass()); Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -190,7 +190,7 @@ FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("sccp", SCCPPass()) -FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) +FUNCTION_PASS("simplify-cfg", SimplifyCFGPass({1, false, false, true})) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -365,7 +365,7 @@ // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createLateCFGSimplificationPass()); + addPass(createCFGSimplificationPass(1, true, true, false)); // Run LoopDataPrefetch // Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -384,10 +384,11 @@ // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass(-1, [this](const Function &F) { - const auto &ST = this->TM->getSubtarget(F); - return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); - })); + addPass(createCFGSimplificationPass( + 1, false, false, true, [this](const Function &F) { + const auto &ST = this->TM->getSubtarget(F); + return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); + })); TargetPassConfig::addIRPasses(); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -250,7 +250,7 @@ addInitialAliasAnalysisPasses(FPM); - FPM.add(createCFGSimplificationPass()); + FPM.add(createCFGSimplificationPass(1, false, false, true)); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); FPM.add(createLowerExpectIntrinsicPass()); @@ -277,7 +277,7 @@ MPM.add(createFunctionInliningPass(IP)); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass(1, false, false, true)); MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); } @@ -310,14 +310,14 @@ MPM.add(createGVNHoistPass()); if (EnableGVNSink) { MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass(1, false, false, true)); } // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass(1, false, false, true)); // Combine silly seq's addInstructionCombiningPass(MPM); if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) @@ -329,7 +329,7 @@ MPM.add(createPGOMemOPSizeOptLegacyPass()); MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass(1, false, false, true)); MPM.add(createReassociatePass()); // Reassociate expressions // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); @@ -338,7 +338,7 @@ MPM.add(createSimpleLoopUnswitchLegacyPass()); else MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass(1, false, false, true)); addInstructionCombiningPass(MPM); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. @@ -347,7 +347,7 @@ if (EnableLoopInterchange) { MPM.add(createLoopInterchangePass()); // Interchange loops - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass(1, false, false, true)); } if (!DisableUnrollLoops) MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops @@ -383,7 +383,7 @@ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. MPM.add(createAggressiveDCEPass()); // Delete dead instructions - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass(1, false, false, true)); // Clean up after everything. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); @@ -466,9 +466,10 @@ MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + // Clean up after IPCP & DAE. + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createCFGSimplificationPass(1, false, false, true)); // For SamplePGO in ThinLTO compile phase, we do not want to do indirect // call promotion as it will change the CFG too much to make the 2nd @@ -612,7 +613,7 @@ addInstructionCombiningPass(MPM); MPM.add(createLICMPass()); MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass(1, false, false, true)); addInstructionCombiningPass(MPM); } @@ -624,7 +625,9 @@ } addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createLateCFGSimplificationPass()); // Switches to lookup tables + // Switches to lookup tables and other transforms that may not be considered + // canonical by other IR passes. + MPM.add(createCFGSimplificationPass(1, true, true, false)); addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { @@ -672,7 +675,7 @@ // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass(1, false, false, true)); addExtensionsToPM(EP_OptimizerLast, MPM); } @@ -797,7 +800,7 @@ // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. addInstructionCombiningPass(PM); // Initial cleanup - PM.add(createCFGSimplificationPass()); // if-convert + PM.add(createCFGSimplificationPass(1, false, false, true)); // if-convert PM.add(createSCCPPass()); // Propagate exposed constants addInstructionCombiningPass(PM); // Clean up again PM.add(createBitTrackingDCEPass()); @@ -821,7 +824,7 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( legacy::PassManagerBase &PM) { // Delete basic blocks, which optimization passes may have killed. - PM.add(createCFGSimplificationPass()); + PM.add(createCFGSimplificationPass(1, false, false, true)); // Drop bodies of available externally objects to improve GlobalDCE. PM.add(createEliminateAvailableExternallyPass()); Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -85,7 +85,6 @@ initializeIPSCCPLegacyPassPass(Registry); initializeSROALegacyPassPass(Registry); initializeCFGSimplifyPassPass(Registry); - initializeLateCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); @@ -119,11 +118,7 @@ } void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCFGSimplificationPass()); -} - -void LLVMAddLateCFGSimplificationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLateCFGSimplificationPass()); + unwrap(PM)->add(createCFGSimplificationPass(1, false, false, true)); } void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { Index: lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -45,9 +45,21 @@ #define DEBUG_TYPE "simplifycfg" -static cl::opt -UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1), - cl::desc("Control the number of bonus instructions (default = 1)")); +static cl::opt UserBonusInstThreshold( + "bonus-inst-threshold", cl::Hidden, cl::init(1), + cl::desc("Control the number of bonus instructions (default = 1)")); + +static cl::opt UserKeepLoops( + "keep-loops", cl::Hidden, cl::init(true), + cl::desc("Preserve canonical loop structure (default = true)")); + +static cl::opt UserSwitchToLookup( + "switch-to-lookup", cl::Hidden, cl::init(false), + cl::desc("Convert switches to lookup tables (default = false)")); + +static cl::opt UserForwardSwitchCond( + "forward-switch-cond", cl::Hidden, cl::init(false), + cl::desc("Forward switch condition to phi ops (default = false)")); STATISTIC(NumSimpl, "Number of blocks simplified"); @@ -179,14 +191,6 @@ return true; } -// FIXME: The new pass manager always creates a "late" simplifycfg pass using -// this default constructor. -SimplifyCFGPass::SimplifyCFGPass() - : Options(UserBonusInstThreshold, true, true, false) {} - -SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &PassOptions) - : Options(PassOptions) {} - PreservedAnalyses SimplifyCFGPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult(F); @@ -199,62 +203,49 @@ } namespace { -struct BaseCFGSimplifyPass : public FunctionPass { +struct CFGSimplifyPass : public FunctionPass { + static char ID; + SimplifyCFGOptions Options; std::function PredicateFtor; - int BonusInstThreshold; - bool ForwardSwitchCondToPhi; - bool ConvertSwitchToLookupTable; - bool KeepCanonicalLoops; - - BaseCFGSimplifyPass(int T, bool ForwardSwitchCond, bool ConvertSwitch, - bool KeepLoops, - std::function Ftor, char &ID) - : FunctionPass(ID), PredicateFtor(std::move(Ftor)), - ForwardSwitchCondToPhi(ForwardSwitchCond), - ConvertSwitchToLookupTable(ConvertSwitch), - KeepCanonicalLoops(KeepLoops) { - BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : T; + + CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false, + bool ConvertSwitch = false, bool KeepLoops = true, + std::function Ftor = nullptr) + : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { + + initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); + + // Check for command-line overrides of options for debug/customization. + Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences() + ? UserBonusInstThreshold + : Threshold; + + Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences() + ? UserForwardSwitchCond + : ForwardSwitchCond; + + Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences() + ? UserSwitchToLookup + : ConvertSwitch; + + Options.NeedCanonicalLoop = + UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops; } + bool runOnFunction(Function &F) override { if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F))) return false; - AssumptionCache *AC = - &getAnalysis().getAssumptionCache(F); - const TargetTransformInfo &TTI = - getAnalysis().getTTI(F); - return simplifyFunctionCFG(F, TTI, - {BonusInstThreshold, ForwardSwitchCondToPhi, - ConvertSwitchToLookupTable, KeepCanonicalLoops, - AC}); + Options.AC = &getAnalysis().getAssumptionCache(F); + auto &TTI = getAnalysis().getTTI(F); + return simplifyFunctionCFG(F, TTI, Options); } - void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addPreserved(); } }; - -struct CFGSimplifyPass : public BaseCFGSimplifyPass { - static char ID; // Pass identification, replacement for typeid - - CFGSimplifyPass(int T = -1, - std::function Ftor = nullptr) - : BaseCFGSimplifyPass(T, false, false, true, Ftor, ID) { - initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); - } -}; - -struct LateCFGSimplifyPass : public BaseCFGSimplifyPass { - static char ID; // Pass identification, replacement for typeid - - LateCFGSimplifyPass(int T = -1, - std::function Ftor = nullptr) - : BaseCFGSimplifyPass(T, true, true, false, Ftor, ID) { - initializeLateCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); - } -}; } char CFGSimplifyPass::ID = 0; @@ -265,24 +256,11 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) -char LateCFGSimplifyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LateCFGSimplifyPass, "latesimplifycfg", - "Simplify the CFG more aggressively", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(LateCFGSimplifyPass, "latesimplifycfg", - "Simplify the CFG more aggressively", false, false) - // Public interface to the CFGSimplification pass FunctionPass * -llvm::createCFGSimplificationPass(int Threshold, - std::function Ftor) { - return new CFGSimplifyPass(Threshold, std::move(Ftor)); -} - -// Public interface to the LateCFGSimplification pass -FunctionPass * -llvm::createLateCFGSimplificationPass(int Threshold, +llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond, + bool ConvertSwitch, bool KeepLoops, std::function Ftor) { - return new LateCFGSimplifyPass(Threshold, std::move(Ftor)); + return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch, + KeepLoops, std::move(Ftor)); } Index: test/CodeGen/AArch64/cmpxchg-idioms.ll =================================================================== --- test/CodeGen/AArch64/cmpxchg-idioms.ll +++ test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -107,7 +107,7 @@ ; CHECK: [[FAILED]]: ; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} -; verify the preheader is simplified by latesimplifycfg. +; verify the preheader is simplified by simplifycfg. ; CHECK: [[PH]]: ; CHECK: orr w22, wzr, #0x2 ; CHECK-NOT: orr w22, wzr, #0x4 Index: test/Transforms/LoopVectorize/X86/float-induction-x86.ll =================================================================== --- test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O3 -latesimplifycfg -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s +; RUN: opt < %s -O3 -simplifycfg -keep-loops=false -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s ; This test checks auto-vectorization with FP induction variable. ; The FP operation is not "fast" and requires "fast-math" function attribute. Index: test/Transforms/LoopVectorize/float-induction.ll =================================================================== --- test/Transforms/LoopVectorize/float-induction.ll +++ test/Transforms/LoopVectorize/float-induction.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -latesimplifycfg -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -simplifycfg -keep-loops=false -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s @fp_inc = common global float 0.000000e+00, align 4 Index: test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll =================================================================== --- test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll +++ test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll @@ -1,8 +1,8 @@ -; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=static < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=ropi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE -; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE -; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=ropi-rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=static < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=ropi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=ropi-rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE ; CHECK: @{{.*}} = private unnamed_addr constant [3 x i32] [i32 1234, i32 5678, i32 15532] ; ENABLE: @{{.*}} = private unnamed_addr constant [3 x i32*] [i32* @c1, i32* @c2, i32* @c3] Index: test/Transforms/SimplifyCFG/CoveredLookupTable.ll =================================================================== --- test/Transforms/SimplifyCFG/CoveredLookupTable.ll +++ test/Transforms/SimplifyCFG/CoveredLookupTable.ll @@ -1,4 +1,4 @@ -; RUN: opt -latesimplifycfg -S %s | FileCheck %s +; RUN: opt -simplifycfg -switch-to-lookup -S %s | FileCheck %s ; rdar://15268442 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" Index: test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll =================================================================== --- test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll +++ test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -latesimplifycfg -S | FileCheck %s +; RUN: opt < %s -simplifycfg -forward-switch-cond=false -S | FileCheck %s --check-prefix=NO_FWD +; RUN: opt < %s -simplifycfg -forward-switch-cond=true -S | FileCheck %s --check-prefix=FWD ; PR10131 @@ -7,11 +8,31 @@ target triple = "i386-pc-linux-gnu" define i32 @t(i32 %m) nounwind readnone { -; CHECK-LABEL: @t( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[M:%.*]], 4 -; CHECK-NEXT: [[M_:%.*]] = select i1 [[SWITCH]], i32 [[M]], i32 4 -; CHECK-NEXT: ret i32 [[M_]] +; NO_FWD-LABEL: @t( +; NO_FWD-NEXT: entry: +; NO_FWD-NEXT: switch i32 [[M:%.*]], label [[SW_BB4:%.*]] [ +; NO_FWD-NEXT: i32 0, label [[RETURN:%.*]] +; NO_FWD-NEXT: i32 1, label [[SW_BB1:%.*]] +; NO_FWD-NEXT: i32 2, label [[SW_BB2:%.*]] +; NO_FWD-NEXT: i32 3, label [[SW_BB3:%.*]] +; NO_FWD-NEXT: ] +; NO_FWD: sw.bb1: +; NO_FWD-NEXT: br label [[RETURN]] +; NO_FWD: sw.bb2: +; NO_FWD-NEXT: br label [[RETURN]] +; NO_FWD: sw.bb3: +; NO_FWD-NEXT: br label [[RETURN]] +; NO_FWD: sw.bb4: +; NO_FWD-NEXT: br label [[RETURN]] +; NO_FWD: return: +; NO_FWD-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 4, [[SW_BB4]] ], [ 3, [[SW_BB3]] ], [ 2, [[SW_BB2]] ], [ 1, [[SW_BB1]] ], [ 0, [[ENTRY:%.*]] ] +; NO_FWD-NEXT: ret i32 [[RETVAL_0]] +; +; FWD-LABEL: @t( +; FWD-NEXT: entry: +; FWD-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[M:%.*]], 4 +; FWD-NEXT: [[M_:%.*]] = select i1 [[SWITCH]], i32 [[M]], i32 4 +; FWD-NEXT: ret i32 [[M_]] ; entry: switch i32 %m, label %sw.bb4 [ @@ -46,18 +67,35 @@ ; This then subsequently should allow squashing of the other trivial case blocks. define i32 @PR34471(i32 %x) { -; CHECK-LABEL: @PR34471( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [ -; CHECK-NEXT: i32 17, label [[RETURN:%.*]] -; CHECK-NEXT: i32 19, label [[RETURN]] -; CHECK-NEXT: i32 42, label [[RETURN]] -; CHECK-NEXT: ] -; CHECK: else3: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ 0, [[ELSE3]] ], [ [[X]], [[ENTRY:%.*]] ], [ [[X]], [[ENTRY]] ], [ [[X]], [[ENTRY]] ] -; CHECK-NEXT: ret i32 [[R]] +; NO_FWD-LABEL: @PR34471( +; NO_FWD-NEXT: entry: +; NO_FWD-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [ +; NO_FWD-NEXT: i32 17, label [[RETURN:%.*]] +; NO_FWD-NEXT: i32 19, label [[IF19:%.*]] +; NO_FWD-NEXT: i32 42, label [[IF42:%.*]] +; NO_FWD-NEXT: ] +; NO_FWD: if19: +; NO_FWD-NEXT: br label [[RETURN]] +; NO_FWD: if42: +; NO_FWD-NEXT: br label [[RETURN]] +; NO_FWD: else3: +; NO_FWD-NEXT: br label [[RETURN]] +; NO_FWD: return: +; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ] +; NO_FWD-NEXT: ret i32 [[R]] +; +; FWD-LABEL: @PR34471( +; FWD-NEXT: entry: +; FWD-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [ +; FWD-NEXT: i32 17, label [[RETURN:%.*]] +; FWD-NEXT: i32 19, label [[RETURN]] +; FWD-NEXT: i32 42, label [[RETURN]] +; FWD-NEXT: ] +; FWD: else3: +; FWD-NEXT: br label [[RETURN]] +; FWD: return: +; FWD-NEXT: [[R:%.*]] = phi i32 [ 0, [[ELSE3]] ], [ [[X]], [[ENTRY:%.*]] ], [ [[X]], [[ENTRY]] ], [ [[X]], [[ENTRY]] ] +; FWD-NEXT: ret i32 [[R]] ; entry: switch i32 %x, label %else3 [ Index: test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll +++ test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll @@ -1,6 +1,7 @@ -; RUN: opt < %s -latesimplifycfg -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -; In the presence of "-no-jump-tables"="true", late simplifycfg should not -; convert any switch cases to lookup tables. +; RUN: opt < %s -simplifycfg -switch-to-lookup -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +; In the presence of "-no-jump-tables"="true", simplifycfg should not convert switches to lookup tables. + ; CHECK: @switch.table.bar = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1] ; CHECK-LABEL: foo ; CHECK-NOT: @switch.table.foo = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1] Index: test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll +++ test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -latesimplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s +; RUN: opt -S -simplifycfg -switch-to-lookup < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s ; rdar://17887153 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin12.0.0" Index: test/Transforms/SimplifyCFG/X86/switch-table-bug.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch-table-bug.ll +++ test/Transforms/SimplifyCFG/X86/switch-table-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -latesimplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s +; RUN: opt -S -simplifycfg -switch-to-lookup < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s ; rdar://17735071 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin12.0.0" Index: test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -latesimplifycfg -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: opt < %s -simplifycfg -switch-to-lookup=true -keep-loops=false -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/SimplifyCFG/multiple-phis.ll =================================================================== --- test/Transforms/SimplifyCFG/multiple-phis.ll +++ test/Transforms/SimplifyCFG/multiple-phis.ll @@ -1,4 +1,4 @@ -; RUN: opt -latesimplifycfg -S < %s | FileCheck %s +; RUN: opt -simplifycfg -keep-loops=false -S < %s | FileCheck %s ; It's not worthwhile to if-convert one of the phi nodes and leave ; the other behind, because that still requires a branch. If Index: test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll =================================================================== --- test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll +++ test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll @@ -1,4 +1,4 @@ -; RUN: opt -latesimplifycfg -S < %s | FileCheck %s +; RUN: opt -simplifycfg -keep-loops=false -S < %s | FileCheck %s define void @test1(i32 %n) #0 { entry: Index: test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- test/Transforms/SimplifyCFG/rangereduce.ll +++ test/Transforms/SimplifyCFG/rangereduce.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -latesimplifycfg -S | FileCheck %s +; RUN: opt < %s -simplifycfg -switch-to-lookup -S | FileCheck %s target datalayout = "e-n32"