diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -80,6 +80,38 @@ extern cl::opt EnableLoopInterleaving; extern cl::opt EnableLoopVectorization; +/// A marker to determine if extra passes after loop vectorization should be +/// run. +struct ShouldRunExtraVectorPasses + : public AnalysisInfoMixin { + static AnalysisKey Key; + struct Result { + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Check whether the analysis has been explicitly invalidated. Otherwise, + // it remains preserved. + auto PAC = PA.getChecker(); + return !PAC.preservedWhenStateless(); + } + }; + + Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); } +}; + +/// A pass manager to run a set of extra function simplification passes after +/// vectorization, if requested. LoopVectorize caches the +/// ShouldRunExtraVectorPasses analysis to request extra simplifications, if +/// they could be beneficial. +struct ExtraVectorPassManager : public FunctionPassManager { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { + auto PA = PreservedAnalyses::all(); + if (AM.getCachedResult(F)) + PA.intersect(FunctionPassManager::run(F, AM)); + PA.abandon(); + return PA; + } +}; + struct LoopVectorizeOptions { /// If false, consider all loops for interleaving. /// If true, only loops that explicitly request interleaving are considered. diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -995,26 +995,28 @@ FPM.addPass(InstCombinePass()); if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { + ExtraVectorPassManager ExtraPasses; // At higher optimization levels, try to clean up any runtime overlap and // alignment checks inserted by the vectorizer. We want to track correlated // runtime checks for two inner loops in the same outer loop, fold any // common computations, hoist loop-invariant aspects out of any outer loop, // and unswitch the runtime checks if possible. Once hoisted, we may have // dead (or speculatable) control flows or more combining opportunities. - FPM.addPass(EarlyCSEPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(InstCombinePass()); + ExtraPasses.addPass(EarlyCSEPass()); + ExtraPasses.addPass(CorrelatedValuePropagationPass()); + ExtraPasses.addPass(InstCombinePass()); LoopPassManager LPM; LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); - FPM.addPass( + ExtraPasses.addPass( RequireAnalysisPass()); - FPM.addPass( + ExtraPasses.addPass( createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); - FPM.addPass(InstCombinePass()); + ExtraPasses.addPass(SimplifyCFGPass()); + ExtraPasses.addPass(InstCombinePass()); + FPM.addPass(std::move(ExtraPasses)); } // Now that we've formed fast to execute loop structures, we do further diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -203,6 +203,7 @@ FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis()) FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis()) +FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses()) FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis()) FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) FUNCTION_ANALYSIS("targetir", diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -428,6 +428,8 @@ namespace llvm { +AnalysisKey ShouldRunExtraVectorPasses::Key; + /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple @@ -10746,8 +10748,17 @@ PA.preserve(); PA.preserve(); } - if (!Result.MadeCFGChange) + + if (Result.MadeCFGChange) { + // Making CFG changes likely means a loop got vectorized. Indicate that + // extra simplification passes should be run. + // TODO: MadeCFGChanges is not a prefect proxy. Extra passes should only + // be run if runtime checks have been added. + AM.getResult(F); + PA.preserve(); + } else { PA.preserveSet(); + } return PA; } diff --git a/llvm/test/Other/opt-pipeline-vector-passes.ll b/llvm/test/Other/opt-pipeline-vector-passes.ll --- a/llvm/test/Other/opt-pipeline-vector-passes.ll +++ b/llvm/test/Other/opt-pipeline-vector-passes.ll @@ -2,6 +2,9 @@ ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -force-vector-width=4 -S %s 2>&1 | FileCheck %s --check-prefixes=O2 ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -force-vector-width=4 -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=O2_EXTRA +; When the loop doesn't get vectorized, no extra vector passes should run. +; RUN: opt -disable-verify -debug-pass-manager -passes='default' -force-vector-width=0 -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=O2 + ; REQUIRES: asserts ; The loop vectorizer still runs at both -O1/-O2 even with the