diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -287,6 +287,7 @@ MergeFunctions = false; UniqueLinkageNames = false; } +extern cl::opt ExtraVectorizerPasses; extern cl::opt EnableConstraintElimination; extern cl::opt EnableGVNHoist; @@ -1255,6 +1256,28 @@ // Cleanup after the loop optimization passes. OptimizePM.addPass(InstCombinePass()); + if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correlated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + OptimizePM.addPass(EarlyCSEPass()); + OptimizePM.addPass(CorrelatedValuePropagationPass()); + OptimizePM.addPass(InstCombinePass()); + LoopPassManager LPM(DebugLogging); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM.addPass( + SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); + OptimizePM.addPass(RequireAnalysisPass()); + OptimizePM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true, + DebugLogging)); + OptimizePM.addPass(SimplifyCFGPass()); + OptimizePM.addPass(InstCombinePass()); + } + // Now that we've formed fast to execute loop structures, we do further // optimizations. These are run afterward as they might block doing complex // analyses and transforms such as what are needed for loop vectorization. @@ -1274,8 +1297,12 @@ .sinkCommonInsts(true))); // Optimize parallel scalar instruction chains into SIMD instructions. - if (PTO.SLPVectorization) + if (PTO.SLPVectorization) { OptimizePM.addPass(SLPVectorizerPass()); + if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { + OptimizePM.addPass(EarlyCSEPass()); + } + } // Enhance/cleanup vector code. OptimizePM.addPass(VectorCombinePass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -60,7 +60,7 @@ cl::init(false), cl::Hidden, cl::desc("Run GVN instead of Early CSE after vectorization passes")); -static cl::opt ExtraVectorizerPasses( +cl::opt ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); diff --git a/llvm/test/Other/opt-pipeline-vector-passes.ll b/llvm/test/Other/opt-pipeline-vector-passes.ll --- a/llvm/test/Other/opt-pipeline-vector-passes.ll +++ b/llvm/test/Other/opt-pipeline-vector-passes.ll @@ -5,6 +5,7 @@ ; RUN: opt -enable-new-pm=0 -O2 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_FORCE_OFF ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O1 ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2 +; RUN: opt -disable-verify -debug-pass-manager -passes='default' -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2_EXTRA ; REQUIRES: asserts @@ -64,6 +65,27 @@ ; NEWPM_O2: Running pass: SLPVectorizerPass ; NEWPM_O2: Running pass: VectorCombinePass -define void @f() { - ret void +; NEWPM_O2_EXTRA-LABEL: Running pass: LoopVectorizePass +; NEWPM_O2_EXTRA: Running pass: EarlyCSEPass +; NEWPM_O2_EXTRA: Running pass: CorrelatedValuePropagationPass +; NEWPM_O2_EXTRA: Running pass: InstCombinePass +; NEWPM_O2_EXTRA: Running pass: LICMPass +; NEWPM_O2_EXTRA: Running pass: SimpleLoopUnswitchPass +; NEWPM_O2_EXTRA: Running pass: SimplifyCFGPass +; NEWPM_O2_EXTRA: Running pass: InstCombinePass +; NEWPM_O2_EXTRA: Running pass: SLPVectorizerPass +; NEWPM_O2_EXTRA: Running pass: EarlyCSEPass +; NEWPM_O2_EXTRA: Running pass: VectorCombinePass + +define i64 @f(i1 %cond) { +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %inc, %loop ] + %inc = add i64 %i, 1 + br i1 %cond, label %loop, label %exit + +exit: + ret i64 %i }