Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -252,8 +252,9 @@ // simplify terminator instructions, convert switches to lookup tables, etc. // FunctionPass *createCFGSimplificationPass( - unsigned Threshold = 1, bool ForwardSwitchCond = false, - bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false, + unsigned Threshold = 1, bool FoldPhiToSelect = true, + bool ForwardSwitchCond = false, bool ConvertSwitch = false, + bool KeepLoops = true, bool SinkCommon = false, std::function Ftor = nullptr); //===----------------------------------------------------------------------===// Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -63,6 +63,7 @@ /// replaced by lookup tables and selects. struct SimplifyCFGOptions { int BonusInstThreshold; + bool FoldPhiToSelect; bool ForwardSwitchCondToPhi; bool ConvertSwitchToLookupTable; bool NeedCanonicalLoop; @@ -70,11 +71,13 @@ AssumptionCache *AC; SimplifyCFGOptions(unsigned BonusThreshold = 1, + bool FoldPhiToSelect = true, bool ForwardSwitchCond = false, bool SwitchToLookup = false, bool CanonicalLoops = true, bool SinkCommon = false, AssumptionCache *AssumpCache = nullptr) : BonusInstThreshold(BonusThreshold), + FoldPhiToSelect(FoldPhiToSelect), ForwardSwitchCondToPhi(ForwardSwitchCond), ConvertSwitchToLookupTable(SwitchToLookup), NeedCanonicalLoop(CanonicalLoops), @@ -86,6 +89,10 @@ BonusInstThreshold = I; return *this; } + SimplifyCFGOptions &foldPhiToSelect(bool B) { + FoldPhiToSelect = B; + return *this; + } SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) { ForwardSwitchCondToPhi = B; return *this; Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -348,6 +348,10 @@ assert(Level != O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); + // During function simplification we want to avoid turning phi into select. + SimplifyCFGOptions SCO; + SCO.foldPhiToSelect(false); + // Form SSA out of local memory accesses after breaking apart aggregates into // scalars. FPM.addPass(SROA()); @@ -362,7 +366,7 @@ // Global value numbering based sinking. if (EnableGVNSink) { FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass(SCO)); } // Speculative execution if the target has divergent branches; otherwise nop. @@ -371,7 +375,7 @@ // Optimize based on known information about branches, and cleanup afterward. FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass(SCO)); if (Level == O3) FPM.addPass(AggressiveInstCombinePass()); FPM.addPass(InstCombinePass()); @@ -388,7 +392,7 @@ FPM.addPass(PGOMemOPSizeOpt()); FPM.addPass(TailCallElimPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass(SCO)); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) @@ -437,7 +441,7 @@ // this once as it is immutable. FPM.addPass(RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging)); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass(SCO)); FPM.addPass(InstCombinePass()); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging)); @@ -482,7 +486,7 @@ // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. FPM.addPass(ADCEPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass(SimplifyCFGPass(SCO)); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -571,7 +575,8 @@ // Create an early function pass manager to cleanup the output of the // frontend. FunctionPassManager EarlyFPM(DebugLogging); - EarlyFPM.addPass(SimplifyCFGPass()); + EarlyFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). + foldPhiToSelect(false))); EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); @@ -705,6 +710,14 @@ createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( std::move(MainCGPipeline), MaxDevirtIterations))); + // Now that all of the CGSCC passes are done, cfg simplify to convert phi to + // select then clean up the result. + FunctionPassManager LateFPM(DebugLogging); + LateFPM.addPass(SimplifyCFGPass()); + LateFPM.addPass(InstCombinePass()); + LateFPM.addPass(EarlyCSEPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM))); + return MPM; } Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -377,7 +377,7 @@ // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass(1, true, true, false, true)); + addPass(createCFGSimplificationPass(1, true, true, true, false, true)); // Run LoopDataPrefetch // Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -392,7 +392,7 @@ // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - 1, false, false, true, true, [this](const Function &F) { + 1, true, false, false, true, true, [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -257,7 +257,7 @@ addInitialAliasAnalysisPasses(FPM); - FPM.add(createCFGSimplificationPass()); + FPM.add(createCFGSimplificationPass(1, false)); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); FPM.add(createLowerExpectIntrinsicPass()); @@ -317,14 +317,14 @@ MPM.add(createGVNHoistPass()); if (EnableGVNSink) { MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass(1, false)); } // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass(1, false)); // Merge & remove BBs // Combine silly seq's if (OptLevel > 2) MPM.add(createAggressiveInstCombinerPass()); @@ -338,7 +338,7 @@ MPM.add(createPGOMemOPSizeOptLegacyPass()); MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass(1, false)); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // Begin the loop pass pipeline. @@ -359,7 +359,7 @@ // FIXME: We break the loop pass pipeline here in order to do full // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the // need for this. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass(1, false)); addInstructionCombiningPass(MPM); // We resume loop passes creating a second loop pipeline here. MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars @@ -370,7 +370,7 @@ if (EnableLoopInterchange) { // FIXME: These are function passes and break the loop pass pipeline. MPM.add(createLoopInterchangePass()); // Interchange loops - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass(1, false)); } if (!DisableUnrollLoops) MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops @@ -407,7 +407,7 @@ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. MPM.add(createAggressiveDCEPass()); // Delete dead instructions - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass(1, false)); // Merge & remove BBs // Clean up after everything. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); @@ -504,7 +504,7 @@ addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createCFGSimplificationPass(1, false)); // Clean up after IPCP & DAE // For SamplePGO in ThinLTO compile phase, we do not want to do indirect // call promotion as it will change the CFG too much to make the 2nd @@ -540,6 +540,12 @@ // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + // Now that all of the CGSCC passes are done, cfg simplify to convert phi to + // select then clean up the result. + MPM.add(createCFGSimplificationPass()); + addInstructionCombiningPass(MPM); + MPM.add(createEarlyCSEPass()); + if (RunPartialInlining) MPM.add(createPartialInliningPass()); @@ -660,7 +666,7 @@ // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + MPM.add(createCFGSimplificationPass(1, true, true, true, false, true)); if (RunSLPAfterLoopVectorization && SLPVectorize) { MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -127,7 +127,7 @@ } void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCFGSimplificationPass(1, false, false, true)); + unwrap(PM)->add(createCFGSimplificationPass()); } void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { Index: lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -49,6 +49,10 @@ "bonus-inst-threshold", cl::Hidden, cl::init(1), cl::desc("Control the number of bonus instructions (default = 1)")); +static cl::opt UserFoldPhiToSelect( + "fold-phi-to-select", cl::Hidden, cl::init(true), + cl::desc("Fold two-entry phi nodes into select (default = true)")); + static cl::opt UserKeepLoops( "keep-loops", cl::Hidden, cl::init(true), cl::desc("Preserve canonical loop structure (default = true)")); @@ -65,7 +69,6 @@ "sink-common-insts", cl::Hidden, cl::init(false), cl::desc("Sink common instructions (default = false)")); - STATISTIC(NumSimpl, "Number of blocks simplified"); /// If we have more than one empty (other than phi node) return blocks, @@ -201,6 +204,9 @@ Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences() ? UserBonusInstThreshold : Opts.BonusInstThreshold; + Options.FoldPhiToSelect = UserFoldPhiToSelect.getNumOccurrences() + ? UserFoldPhiToSelect + : Opts.FoldPhiToSelect; Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences() ? UserForwardSwitchCond : Opts.ForwardSwitchCondToPhi; @@ -232,9 +238,9 @@ SimplifyCFGOptions Options; std::function PredicateFtor; - CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false, - bool ConvertSwitch = false, bool KeepLoops = true, - bool SinkCommon = false, + CFGSimplifyPass(unsigned Threshold = 1, bool FoldPhiToSelect = true, + bool ForwardSwitchCond = false, bool ConvertSwitch = false, + bool KeepLoops = true, bool SinkCommon = false, std::function Ftor = nullptr) : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { @@ -245,6 +251,10 @@ ? UserBonusInstThreshold : Threshold; + Options.FoldPhiToSelect = UserFoldPhiToSelect.getNumOccurrences() + ? UserFoldPhiToSelect + : FoldPhiToSelect; + Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences() ? UserForwardSwitchCond : ForwardSwitchCond; @@ -287,10 +297,11 @@ // Public interface to the CFGSimplification pass FunctionPass * -llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond, - bool ConvertSwitch, bool KeepLoops, - bool SinkCommon, +llvm::createCFGSimplificationPass(unsigned Threshold, bool FoldPhiToSelect, + bool ForwardSwitchCond, bool ConvertSwitch, + bool KeepLoops, bool SinkCommon, std::function Ftor) { - return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch, - KeepLoops, SinkCommon, std::move(Ftor)); + return new CFGSimplifyPass(Threshold, FoldPhiToSelect, ForwardSwitchCond, + ConvertSwitch, KeepLoops, SinkCommon, + std::move(Ftor)); } Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -6032,9 +6032,10 @@ // If there is a trivial two-entry PHI node in this basic block, and we can // eliminate it, do so now. - if (auto *PN = dyn_cast(BB->begin())) - if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, TTI, DL); + if (Options.FoldPhiToSelect) + if (auto *PN = dyn_cast(BB->begin())) + if (PN->getNumIncomingValues() == 2) + Changed |= FoldTwoEntryPHINode(PN, TTI, DL); Builder.SetInsertPoint(BB->getTerminator()); if (auto *BI = dyn_cast(BB->getTerminator())) { Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -208,6 +208,12 @@ ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass ; CHECK-O-NEXT: Finished CGSCC pass manager run. +; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +; CHECK-O-NEXT: Starting llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo +; CHECK-O-NEXT: Running pass: InstCombinePass on foo +; CHECK-O-NEXT: Running pass: EarlyCSEPass on foo +; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting llvm::Module pass manager run. Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -186,6 +186,12 @@ ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. +; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +; CHECK-O-NEXT: Starting llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo +; CHECK-O-NEXT: Running pass: InstCombinePass on foo +; CHECK-O-NEXT: Running pass: EarlyCSEPass on foo +; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> Index: test/Other/opt-O2-pipeline.ll =================================================================== --- test/Other/opt-O2-pipeline.ll +++ test/Other/opt-O2-pipeline.ll @@ -173,6 +173,17 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: A No-Op Barrier Pass +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Early CSE ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO Index: test/Other/opt-O3-pipeline.ll =================================================================== --- test/Other/opt-O3-pipeline.ll +++ test/Other/opt-O3-pipeline.ll @@ -177,6 +177,17 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: A No-Op Barrier Pass +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Early CSE ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO Index: test/Other/opt-Os-pipeline.ll =================================================================== --- test/Other/opt-Os-pipeline.ll +++ test/Other/opt-Os-pipeline.ll @@ -160,6 +160,17 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: A No-Op Barrier Pass +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Early CSE ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO Index: test/Other/pass-pipelines.ll =================================================================== --- test/Other/pass-pipelines.ll +++ test/Other/pass-pipelines.ll @@ -55,6 +55,11 @@ ; Next we break out of the main Function passes inside the CGSCC pipeline with ; a barrier pass. ; CHECK-O2: A No-Op Barrier Pass +; Post-CGSCC simplification +; CHECK-O2-NEXT: FunctionPass Manager +; CHECK-O2: Simplify the CFG +; CHECK-O2: Combine redundant instructions +; CHECK-O2: Early CSE ; CHECK-O2-NEXT: Eliminate Available Externally ; Inferring function attribute should be right after the CGSCC pipeline, before ; any other optimizations/analyses.