Index: llvm/trunk/include/llvm-c/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm-c/Transforms/Scalar.h +++ llvm/trunk/include/llvm-c/Transforms/Scalar.h @@ -44,6 +44,9 @@ /** See llvm::createCFGSimplificationPass function. */ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); +/** See llvm::createLateCFGSimplificationPass function. */ +void LLVMAddLateCFGSimplificationPass(LLVMPassManagerRef PM); + /** See llvm::createDeadStoreEliminationPass function. */ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM); Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -86,6 +86,7 @@ void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); void initializeCFGPrinterLegacyPassPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); +void initializeLateCFGSimplifyPassPass(PassRegistry&); void initializeCFGViewerLegacyPassPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&); Index: llvm/trunk/include/llvm/LinkAllPasses.h =================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h +++ llvm/trunk/include/llvm/LinkAllPasses.h @@ -75,6 +75,7 @@ (void) llvm::createCallGraphDOTPrinterPass(); (void) llvm::createCallGraphViewerPass(); (void) llvm::createCFGSimplificationPass(); + (void) llvm::createLateCFGSimplificationPass(); (void) llvm::createCFLAndersAAWrapperPass(); (void) llvm::createCFLSteensAAWrapperPass(); (void) llvm::createStructurizeCFGPass(); Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -262,6 +262,14 @@ //===----------------------------------------------------------------------===// // +// LateCFGSimplification - Like CFGSimplification, but may also +// convert switches to lookup tables. +// +FunctionPass *createLateCFGSimplificationPass( + int Threshold = -1, std::function Ftor = nullptr); + +//===----------------------------------------------------------------------===// +// // FlattenCFG - flatten CFG, reduce number of conditional branches by using // parallel-and and parallel-or mode, etc... // Index: llvm/trunk/include/llvm/Transforms/Scalar/SimplifyCFG.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/SimplifyCFG.h +++ llvm/trunk/include/llvm/Transforms/Scalar/SimplifyCFG.h @@ -27,13 +27,16 @@ /// by the rest of the mid-level optimizer. class SimplifyCFGPass : public PassInfoMixin { int BonusInstThreshold; + bool LateSimplifyCFG; public: - /// \brief Construct a pass with the default thresholds. + /// \brief Construct a pass with the default thresholds + /// and switch optimizations. SimplifyCFGPass(); - /// \brief Construct a pass with a specific bonus threshold. - SimplifyCFGPass(int BonusInstThreshold); + /// \brief Construct a pass with a specific bonus threshold + /// and optional switch optimizations. + SimplifyCFGPass(int BonusInstThreshold, bool LateSimplifyCFG); /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); Index: llvm/trunk/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/Local.h +++ llvm/trunk/include/llvm/Transforms/Utils/Local.h @@ -142,7 +142,8 @@ /// eliminate. bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, unsigned BonusInstThreshold, AssumptionCache *AC = nullptr, - SmallPtrSetImpl *LoopHeaders = nullptr); + SmallPtrSetImpl *LoopHeaders = nullptr, + bool LateSimplifyCFG = false); /// This function is used to flatten a CFG. For example, it uses parallel-and /// and parallel-or mode to collapse if-conditions and merge if-regions with Index: llvm/trunk/lib/LTO/LTOCodeGenerator.cpp =================================================================== --- llvm/trunk/lib/LTO/LTOCodeGenerator.cpp +++ llvm/trunk/lib/LTO/LTOCodeGenerator.cpp @@ -141,6 +141,7 @@ initializeMemCpyOptLegacyPassPass(R); initializeDCELegacyPassPass(R); initializeCFGSimplifyPassPass(R); + initializeLateCFGSimplifyPassPass(R); } void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) { Index: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -631,7 +631,7 @@ } addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); + MPM.add(createLateCFGSimplificationPass()); // Switches to lookup tables addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp @@ -81,6 +81,7 @@ initializeIPSCCPLegacyPassPass(Registry); initializeSROALegacyPassPass(Registry); initializeCFGSimplifyPassPass(Registry); + initializeLateCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); @@ -117,6 +118,10 @@ unwrap(PM)->add(createCFGSimplificationPass()); } +void LLVMAddLateCFGSimplificationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLateCFGSimplificationPass()); +} + void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createDeadStoreEliminationPass()); } Index: llvm/trunk/lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ llvm/trunk/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -130,7 +130,8 @@ /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, AssumptionCache *AC, - unsigned BonusInstThreshold) { + unsigned BonusInstThreshold, + bool LateSimplifyCFG) { bool Changed = false; bool LocalChange = true; @@ -145,7 +146,7 @@ // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) { + if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders, LateSimplifyCFG)) { LocalChange = true; ++NumSimpl; } @@ -156,10 +157,12 @@ } static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, - AssumptionCache *AC, int BonusInstThreshold) { + AssumptionCache *AC, int BonusInstThreshold, + bool LateSimplifyCFG) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); + EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold, + LateSimplifyCFG); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -173,7 +176,8 @@ return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); + EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold, + LateSimplifyCFG); EverChanged |= removeUnreachableBlocks(F); } while (EverChanged); @@ -181,17 +185,19 @@ } SimplifyCFGPass::SimplifyCFGPass() - : BonusInstThreshold(UserBonusInstThreshold) {} + : BonusInstThreshold(UserBonusInstThreshold), + LateSimplifyCFG(true) {} -SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold) - : BonusInstThreshold(BonusInstThreshold) {} +SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold, bool LateSimplifyCFG) + : BonusInstThreshold(BonusInstThreshold), + LateSimplifyCFG(LateSimplifyCFG) {} PreservedAnalyses SimplifyCFGPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult(F); auto &AC = AM.getResult(F); - if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold)) + if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold, LateSimplifyCFG)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); @@ -199,16 +205,17 @@ } namespace { -struct CFGSimplifyPass : public FunctionPass { - static char ID; // Pass identification, replacement for typeid +struct BaseCFGSimplifyPass : public FunctionPass { unsigned BonusInstThreshold; std::function PredicateFtor; + bool LateSimplifyCFG; - CFGSimplifyPass(int T = -1, - std::function Ftor = nullptr) - : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { + BaseCFGSimplifyPass(int T, bool LateSimplifyCFG, + std::function Ftor, + char &ID) + : FunctionPass(ID), PredicateFtor(std::move(Ftor)), + LateSimplifyCFG(LateSimplifyCFG) { BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T); - initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F))) @@ -218,7 +225,7 @@ &getAnalysis().getAssumptionCache(F); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); - return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold); + return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold, LateSimplifyCFG); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -227,6 +234,26 @@ AU.addPreserved(); } }; + +struct CFGSimplifyPass : public BaseCFGSimplifyPass { + static char ID; // Pass identification, replacement for typeid + + CFGSimplifyPass(int T = -1, + std::function Ftor = nullptr) + : BaseCFGSimplifyPass(T, false, Ftor, ID) { + initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); + } +}; + +struct LateCFGSimplifyPass : public BaseCFGSimplifyPass { + static char ID; // Pass identification, replacement for typeid + + LateCFGSimplifyPass(int T = -1, + std::function Ftor = nullptr) + : BaseCFGSimplifyPass(T, true, Ftor, ID) { + initializeLateCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); + } +}; } char CFGSimplifyPass::ID = 0; @@ -237,9 +264,24 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) +char LateCFGSimplifyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LateCFGSimplifyPass, "latesimplifycfg", + "Simplify the CFG more aggressively", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(LateCFGSimplifyPass, "latesimplifycfg", + "Simplify the CFG more aggressively", false, false) + // Public interface to the CFGSimplification pass FunctionPass * llvm::createCFGSimplificationPass(int Threshold, - std::function Ftor) { + std::function Ftor) { return new CFGSimplifyPass(Threshold, std::move(Ftor)); } + +// Public interface to the LateCFGSimplification pass +FunctionPass * +llvm::createLateCFGSimplificationPass(int Threshold, + std::function Ftor) { + return new LateCFGSimplifyPass(Threshold, std::move(Ftor)); +} Index: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp @@ -170,6 +170,8 @@ unsigned BonusInstThreshold; AssumptionCache *AC; SmallPtrSetImpl *LoopHeaders; + // See comments in SimplifyCFGOpt::SimplifySwitch. + bool LateSimplifyCFG; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases( TerminatorInst *TI, std::vector &Cases); @@ -193,9 +195,10 @@ public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, unsigned BonusInstThreshold, AssumptionCache *AC, - SmallPtrSetImpl *LoopHeaders) + SmallPtrSetImpl *LoopHeaders, + bool LateSimplifyCFG) : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC), - LoopHeaders(LoopHeaders) {} + LoopHeaders(LoopHeaders), LateSimplifyCFG(LateSimplifyCFG) {} bool run(BasicBlock *BB); }; @@ -5562,7 +5565,12 @@ if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToLookupTable(SI, Builder, DL, TTI)) + // The conversion from switch to lookup tables results in difficult + // to analyze code and makes pruning branches much harder. + // This is a problem of the switch expression itself can still be + // restricted as a result of inlining or CVP. There only apply this + // transformation during late steps of the optimisation chain. + if (LateSimplifyCFG && SwitchToLookupTable(SI, Builder, DL, TTI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (ReduceSwitchRange(SI, Builder, DL, TTI)) @@ -6021,8 +6029,9 @@ /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, unsigned BonusInstThreshold, AssumptionCache *AC, - SmallPtrSetImpl *LoopHeaders) { + SmallPtrSetImpl *LoopHeaders, + bool LateSimplifyCFG) { return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), - BonusInstThreshold, AC, LoopHeaders) + BonusInstThreshold, AC, LoopHeaders, LateSimplifyCFG) .run(BB); } Index: llvm/trunk/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll @@ -1,8 +1,8 @@ -; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=static < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=ropi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE -; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE -; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=ropi-rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=static < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=ropi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: opt -S -latesimplifycfg -mtriple=arm -relocation-model=ropi-rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE ; CHECK: @{{.*}} = private unnamed_addr constant [3 x i32] [i32 1234, i32 5678, i32 15532] ; ENABLE: @{{.*}} = private unnamed_addr constant [3 x i32*] [i32* @c1, i32* @c2, i32* @c3] Index: llvm/trunk/test/Transforms/SimplifyCFG/CoveredLookupTable.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/CoveredLookupTable.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/CoveredLookupTable.ll @@ -1,4 +1,4 @@ -; RUN: opt -simplifycfg -S %s | FileCheck %s +; RUN: opt -latesimplifycfg -S %s | FileCheck %s ; rdar://15268442 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" Index: llvm/trunk/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -simplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s +; RUN: opt -S -latesimplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s ; rdar://17887153 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin12.0.0" Index: llvm/trunk/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -simplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s +; RUN: opt -S -latesimplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s ; rdar://17735071 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin12.0.0" Index: llvm/trunk/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplifycfg -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: opt < %s -latesimplifycfg -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -1178,8 +1178,9 @@ ret i32 %retval.0 ; CHECK-LABEL: @reuse_cmp2( ; CHECK: entry: -; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0 -; CHECK-NEXT: [[C:%.+]] = icmp ult i32 %switch.tableidx, 4 +; CHECK-NEXT: %switch = icmp ult i32 %x, 4 +; CHECK-NEXT: %x. = select i1 %switch, i32 %x, i32 4 +; CHECK-NEXT: [[C:%.+]] = icmp ne i32 %x., 4 ; CHECK: [[R:%.+]] = select i1 [[C]], i32 {{.*}}, i32 100 ; CHECK-NEXT: ret i32 [[R]] } Index: llvm/trunk/test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/rangereduce.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/rangereduce.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplifycfg -S | FileCheck %s +; RUN: opt < %s -latesimplifycfg -S | FileCheck %s target datalayout = "e-n32"