Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -181,11 +181,11 @@ // // LoopUnroll - This pass is a simple loop unrolling pass. // -Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1, +Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1, int AllowPartial = -1, int Runtime = -1, int UpperBound = -1); // Create an unrolling pass for full unrolling that uses exact trip count only. -Pass *createSimpleLoopUnrollPass(); +Pass *createSimpleLoopUnrollPass(int OptLevel); //===----------------------------------------------------------------------===// // Index: include/llvm/Transforms/Scalar/LoopUnrollPass.h =================================================================== --- include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -18,9 +18,10 @@ class LoopUnrollPass : public PassInfoMixin { const bool AllowPartialUnrolling; + const int OptLevel; - explicit LoopUnrollPass(bool AllowPartialUnrolling) - : AllowPartialUnrolling(AllowPartialUnrolling) {} + explicit LoopUnrollPass(bool AllowPartialUnrolling, int OptLevel) + : AllowPartialUnrolling(AllowPartialUnrolling), OptLevel(OptLevel) {} public: /// Create an instance of the loop unroll pass that will support both full @@ -28,16 +29,16 @@ /// /// This uses the target information (or flags) to control the thresholds for /// different unrolling stategies but supports all of them. - static LoopUnrollPass create() { - return LoopUnrollPass(/*AllowPartialUnrolling*/ true); + static LoopUnrollPass create(int OptLevel = 2) { + return LoopUnrollPass(/*AllowPartialUnrolling*/ true, OptLevel); } /// Create an instance of the loop unroll pass that only does full loop /// unrolling. /// /// This will disable any runtime or partial unrolling. - static LoopUnrollPass createFull() { - return LoopUnrollPass(/*AllowPartialUnrolling*/ false); + static LoopUnrollPass createFull(int OptLevel = 2) { + return LoopUnrollPass(/*AllowPartialUnrolling*/ false, OptLevel); } PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -334,7 +334,7 @@ LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(LoopDeletionPass()); - LPM2.addPass(LoopUnrollPass::createFull()); + LPM2.addPass(LoopUnrollPass::createFull(Level)); // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. @@ -605,7 +605,7 @@ // FIXME: It would be really good to use a loop-integrated instruction // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create())); + OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create(Level))); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -320,7 +320,7 @@ MPM.add(createCFGSimplificationPass()); } if (!DisableUnrollLoops) - MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops + MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); if (OptLevel > 1) { @@ -366,7 +366,7 @@ // BBVectorize may have significantly shortened a loop body; unroll again. if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); + MPM.add(createLoopUnrollPass(OptLevel)); } } @@ -612,7 +612,7 @@ // BBVectorize may have significantly shortened a loop body; unroll again. if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); + MPM.add(createLoopUnrollPass(OptLevel)); } } @@ -621,7 +621,7 @@ addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { - MPM.add(createLoopUnrollPass()); // Unroll small loops + MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops // LoopUnroll may generate some redundency to cleanup. addInstructionCombiningPass(MPM); @@ -772,11 +772,11 @@ PM.add(createLoopInterchangePass()); if (!DisableUnrollLoops) - PM.add(createSimpleLoopUnrollPass()); // Unroll small loops + PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops PM.add(createLoopVectorizePass(true, LoopVectorize)); // The vectorizer may have significantly shortened a loop body; unroll again. if (!DisableUnrollLoops) - PM.add(createLoopUnrollPass()); + PM.add(createLoopUnrollPass(OptLevel)); // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -131,13 +131,14 @@ /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( - Loop *L, const TargetTransformInfo &TTI, Optional UserThreshold, - Optional UserCount, Optional UserAllowPartial, - Optional UserRuntime, Optional UserUpperBound) { + Loop *L, const TargetTransformInfo &TTI, int OptLevel, + Optional UserThreshold, Optional UserCount, + Optional UserAllowPartial, Optional UserRuntime, + Optional UserUpperBound) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults - UP.Threshold = 150; + UP.Threshold = OptLevel > 2 ? 300 : 150; UP.MaxPercentThresholdBoost = 400; UP.OptSizeThreshold = 0; UP.PartialThreshold = 150; @@ -927,7 +928,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, - bool PreserveLCSSA, + bool PreserveLCSSA, int OptLevel, Optional ProvidedCount, Optional ProvidedThreshold, Optional ProvidedAllowPartial, @@ -947,7 +948,7 @@ bool NotDuplicatable; bool Convergent; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, + L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound); // Exit early if unrolling is disabled. if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) @@ -1047,16 +1048,17 @@ class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll(Optional Threshold = None, + LoopUnroll(int OptLevel = 2, Optional Threshold = None, Optional Count = None, Optional AllowPartial = None, Optional Runtime = None, Optional UpperBound = None) - : LoopPass(ID), ProvidedCount(std::move(Count)), + : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } + int OptLevel; Optional ProvidedCount; Optional ProvidedThreshold; Optional ProvidedAllowPartial; @@ -1081,7 +1083,7 @@ OptimizationRemarkEmitter ORE(&F); bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, + return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound); @@ -1107,21 +1109,22 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, - int Runtime, int UpperBound) { +Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count, + int AllowPartial, int Runtime, + int UpperBound) { // TODO: It would make more sense for this function to take the optionals // directly, but that's dangerous since it would silently break out of tree // callers. - return new LoopUnroll(Threshold == -1 ? None : Optional(Threshold), - Count == -1 ? None : Optional(Count), - AllowPartial == -1 ? None - : Optional(AllowPartial), - Runtime == -1 ? None : Optional(Runtime), - UpperBound == -1 ? None : Optional(UpperBound)); + return new LoopUnroll( + OptLevel, Threshold == -1 ? None : Optional(Threshold), + Count == -1 ? None : Optional(Count), + AllowPartial == -1 ? None : Optional(AllowPartial), + Runtime == -1 ? None : Optional(Runtime), + UpperBound == -1 ? None : Optional(UpperBound)); } -Pass *llvm::createSimpleLoopUnrollPass() { - return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0); +Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) { + return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0); } PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, @@ -1153,10 +1156,10 @@ Optional AllowPartialParam, RuntimeParam, UpperBoundParam; if (!AllowPartialUnrolling) AllowPartialParam = RuntimeParam = UpperBoundParam = false; - bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE, - /*PreserveLCSSA*/ true, /*Count*/ None, - /*Threshold*/ None, AllowPartialParam, - RuntimeParam, UpperBoundParam); + bool Changed = tryToUnrollLoop( + &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE, + /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, + /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam); if (!Changed) return PreservedAnalyses::all(); Index: test/Transforms/LoopVectorize/X86/metadata-enable.ll =================================================================== --- test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1,13 +1,14 @@ ; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1 ; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2 -; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3 +; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3 +; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT ; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os ; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz ; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC ; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC ; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2 ; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2 -; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS +; RUN: opt < %s -mcpu=corei7 -O3 -unroll-threshold=150 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS ; This file tests the llvm.loop.vectorize.enable metadata forcing ; vectorization even when optimization levels are too low, or when @@ -25,6 +26,9 @@ ; O3-LABEL: @enabled( ; O3: store <4 x i32> ; O3: ret i32 +; O3DEFAULT-LABEL: @enabled( +; O3DEFAULT: store <4 x i32> +; O3DEFAULT: ret i32 ; Pragma always wins! ; O3DIS-LABEL: @enabled( ; O3DIS: store <4 x i32> @@ -77,6 +81,9 @@ ; O3-LABEL: @nopragma( ; O3: store <4 x i32> ; O3: ret i32 +; O3DEFAULT-LABEL: @nopragma( +; O3DEFAULT: store <4 x i32> +; O3DEFAULT: ret i32 ; O3DIS-LABEL: @nopragma( ; O3DIS-NOT: store <4 x i32> ; O3DIS: ret i32 @@ -128,6 +135,9 @@ ; O3-LABEL: @disabled( ; O3-NOT: store <4 x i32> ; O3: ret i32 +; O3DEFAULT-LABEL: @disabled( +; O3DEFAULT: store <4 x i32> +; O3DEFAULT: ret i32 ; O3DIS-LABEL: @disabled( ; O3DIS-NOT: store <4 x i32> ; O3DIS: ret i32