Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -259,7 +259,7 @@ // FunctionPass *createCFGSimplificationPass( unsigned Threshold = 1, bool ForwardSwitchCond = false, - bool ConvertSwitch = false, bool KeepLoops = true, + bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = true, std::function Ftor = nullptr); //===----------------------------------------------------------------------===// Index: include/llvm/Transforms/Scalar/SimplifyCFG.h =================================================================== --- include/llvm/Transforms/Scalar/SimplifyCFG.h +++ include/llvm/Transforms/Scalar/SimplifyCFG.h @@ -34,12 +34,15 @@ /// The default constructor sets the pass options to create canonical IR, /// rather than optimal IR. That is, by default we bypass transformations that /// are likely to improve performance but make analysis for other passes more - /// difficult. + /// difficult. Sinking common instructions is an exception to this rule - we + /// enable that transform by default because it should help in general, but we + /// disable it if even more basic transforms like early-cse have not run yet. SimplifyCFGPass() : SimplifyCFGPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(false) .convertSwitchToLookupTable(false) - .needCanonicalLoops(true)) {} + .needCanonicalLoops(true) + .sinkCommonInsts(true)) {} /// Construct a pass with optional optimizations. Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -63,16 +63,20 @@ bool ForwardSwitchCondToPhi; bool ConvertSwitchToLookupTable; bool NeedCanonicalLoop; + bool SinkCommonInsts; AssumptionCache *AC; SimplifyCFGOptions(unsigned BonusThreshold = 1, bool ForwardSwitchCond = false, bool SwitchToLookup = false, bool CanonicalLoops = true, + bool SinkCommon = true, AssumptionCache *AssumpCache = nullptr) : BonusInstThreshold(BonusThreshold), ForwardSwitchCondToPhi(ForwardSwitchCond), ConvertSwitchToLookupTable(SwitchToLookup), - NeedCanonicalLoop(CanonicalLoops), AC(AssumpCache) {} + NeedCanonicalLoop(CanonicalLoops), + SinkCommonInsts(SinkCommon), + AC(AssumpCache) {} // Support 'builder' pattern to set members by name at construction time. SimplifyCFGOptions &bonusInstThreshold(int I) { @@ -91,6 +95,10 @@ NeedCanonicalLoop = B; return *this; } + SimplifyCFGOptions &sinkCommonInsts(bool B) { + SinkCommonInsts = B; + return *this; + } SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) { AC = Cache; return *this; Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -544,7 +544,10 @@ // Create an early function pass manager to cleanup the output of the // frontend. FunctionPassManager EarlyFPM(DebugLogging); - EarlyFPM.addPass(SimplifyCFGPass()); + // Do not sink common instructions and form selects before early-cse has had + // a chance to eliminate redundant operations. + EarlyFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). + sinkCommonInsts(false))); EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -385,7 +385,7 @@ // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - 1, false, false, true, [this](const Function &F) { + 1, false, false, true, true, [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -250,7 +250,9 @@ addInitialAliasAnalysisPasses(FPM); - FPM.add(createCFGSimplificationPass()); + // Do not sink common instructions and form selects before early-cse has had + // a chance to eliminate redundant operations. + FPM.add(createCFGSimplificationPass(1, false, false, true, false)); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); FPM.add(createLowerExpectIntrinsicPass()); Index: lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -61,6 +61,11 @@ "forward-switch-cond", cl::Hidden, cl::init(false), cl::desc("Forward switch condition to phi ops (default = false)")); +static cl::opt UserSinkCommonInsts( + "sink-common-insts", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions (default = true)")); + + STATISTIC(NumSimpl, "Number of blocks simplified"); /// If we have more than one empty (other than phi node) return blocks, @@ -205,6 +210,9 @@ Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences() ? UserKeepLoops : Opts.NeedCanonicalLoop; + Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() + ? UserSinkCommonInsts + : Opts.SinkCommonInsts; } PreservedAnalyses SimplifyCFGPass::run(Function &F, @@ -226,6 +234,7 @@ CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false, bool ConvertSwitch = false, bool KeepLoops = true, + bool SinkCommon = true, std::function Ftor = nullptr) : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { @@ -246,6 +255,10 @@ Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops; + + Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() + ? UserSinkCommonInsts + : SinkCommon; } bool runOnFunction(Function &F) override { @@ -276,7 +289,8 @@ FunctionPass * llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond, bool ConvertSwitch, bool KeepLoops, + bool SinkCommon, std::function Ftor) { return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch, - KeepLoops, std::move(Ftor)); + KeepLoops, SinkCommon, std::move(Ftor)); } Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5710,7 +5710,7 @@ BasicBlock *BB = BI->getParent(); BasicBlock *Succ = BI->getSuccessor(0); - if (SinkCommon && SinkThenElseCodeToEnd(BI)) + if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. Index: test/Transforms/PhaseOrdering/simplifycfg-options.ll =================================================================== --- test/Transforms/PhaseOrdering/simplifycfg-options.ll +++ test/Transforms/PhaseOrdering/simplifycfg-options.ll @@ -76,10 +76,8 @@ ; ALL-NEXT: [[XI:%.*]] = load double, double* [[XI_PTR]], align 8 ; ALL-NEXT: [[YI:%.*]] = load double, double* [[YI_PTR]], align 8 ; ALL-NEXT: [[CMP:%.*]] = fcmp ogt double [[XI]], [[YI]] -; ALL-NEXT: [[Y_SINK:%.*]] = select i1 [[CMP]], double* [[X]], double* [[Y]] -; ALL-NEXT: [[YI_PTR_AGAIN:%.*]] = getelementptr double, double* [[Y_SINK]], i64 [[I]] -; ALL-NEXT: [[YI_AGAIN:%.*]] = load double, double* [[YI_PTR_AGAIN]], align 8 -; ALL-NEXT: ret double [[YI_AGAIN]] +; ALL-NEXT: [[XI_YI:%.*]] = select i1 [[CMP]], double [[XI]], double [[YI]] +; ALL-NEXT: ret double [[XI_YI]] ; entry: %xi_ptr = getelementptr double, double* %x, i64 %i