Index: include/llvm/Transforms/Scalar/LoopUnrollPass.h =================================================================== --- include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -16,33 +16,29 @@ namespace llvm { -class LoopUnrollPass : public PassInfoMixin { - const bool AllowPartialUnrolling; +/// Loop unroll pass that only does full loop unrolling. +class LoopFullUnrollPass : public PassInfoMixin { const int OptLevel; - explicit LoopUnrollPass(bool AllowPartialUnrolling, int OptLevel) - : AllowPartialUnrolling(AllowPartialUnrolling), OptLevel(OptLevel) {} +public: + explicit LoopFullUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {} + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +/// Loop unroll pass that will support both full and partial unrolling. +/// It is a function pass to have access to function and module analyses. +/// It will also put loops into canonical form (simplified and LCSSA). +class LoopUnrollPass : public PassInfoMixin { + const int OptLevel; public: - /// Create an instance of the loop unroll pass that will support both full - /// and partial unrolling. - /// /// This uses the target information (or flags) to control the thresholds for /// different unrolling stategies but supports all of them. - static LoopUnrollPass create(int OptLevel = 2) { - return LoopUnrollPass(/*AllowPartialUnrolling*/ true, OptLevel); - } - - /// Create an instance of the loop unroll pass that only does full loop - /// unrolling. - /// - /// This will disable any runtime or partial unrolling. - static LoopUnrollPass createFull(int OptLevel = 2) { - return LoopUnrollPass(/*AllowPartialUnrolling*/ false, OptLevel); - } + explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {} - PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, LPMUpdater &U); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; } // end namespace llvm Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -394,7 +394,7 @@ // inaccurate. if (Phase != ThinLTOPhase::PreLink || !PGOOpt || PGOOpt->SampleProfileFile.empty()) - LPM2.addPass(LoopUnrollPass::createFull(Level)); + LPM2.addPass(LoopFullUnrollPass(Level)); for (auto &C : LoopOptimizerEndEPCallbacks) C(LPM2, Level); @@ -723,7 +723,7 @@ // FIXME: It would be really good to use a loop-integrated instruction // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create(Level))); + OptimizePM.addPass(LoopUnrollPass(Level)); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -196,6 +196,7 @@ FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) +FUNCTION_PASS("unroll", LoopUnrollPass()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify", DominatorTreeVerifierPass()) FUNCTION_PASS("verify", LoopVerifierPass()) @@ -227,8 +228,7 @@ LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass()) LOOP_PASS("strength-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) -LOOP_PASS("unroll", LoopUnrollPass::create()) -LOOP_PASS("unroll-full", LoopUnrollPass::createFull()) +LOOP_PASS("unroll-full", LoopFullUnrollPass()) LOOP_PASS("unswitch", SimpleLoopUnswitchPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1129,9 +1129,9 @@ return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0); } -PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &Updater) { +PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &Updater) { const auto &FAM = AM.getResult(L, AR).getManager(); Function *F = L.getHeader()->getParent(); @@ -1139,8 +1139,9 @@ auto *ORE = FAM.getCachedResult(*F); // FIXME: This should probably be optional rather than required. if (!ORE) - report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not " - "cached at a higher level"); + report_fatal_error( + "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not " + "cached at a higher level"); // Keep track of the previous loop structure so we can identify new loops // created by unrolling. @@ -1151,17 +1152,11 @@ else OldLoops.insert(AR.LI.begin(), AR.LI.end()); - // The API here is quite complex to call, but there are only two interesting - // states we support: partial and full (or "simple") unrolling. However, to - // enable these things we actually pass "None" in for the optional to avoid - // providing an explicit choice. - Optional AllowPartialParam, RuntimeParam, UpperBoundParam; - if (!AllowPartialUnrolling) - AllowPartialParam = RuntimeParam = UpperBoundParam = false; - bool Changed = tryToUnrollLoop( - &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, - /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, - /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam); + bool Changed = + tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, + /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, + /*Threshold*/ None, /*AllowPartial*/ false, + /*Runtime*/ false, /*UpperBound*/ false); if (!Changed) return PreservedAnalyses::all(); @@ -1172,17 +1167,13 @@ #endif // Unrolling can do several things to introduce new loops into a loop nest: - // - Partial unrolling clones child loops within the current loop. If it - // uses a remainder, then it can also create any number of sibling loops. // - Full unrolling clones child loops within the current loop but then // removes the current loop making all of the children appear to be new // sibling loops. - // - Loop peeling can directly introduce new sibling loops by peeling one - // iteration. // - // When a new loop appears as a sibling loop, either from peeling an - // iteration or fully unrolling, its nesting structure has fundamentally - // changed and we want to revisit it to reflect that. + // When a new loop appears as a sibling loop after fully unrolling, + // its nesting structure has fundamentally changed and we want to revisit + // it to reflect that. // // When unrolling has removed the current loop, we need to tell the // infrastructure that it is gone. @@ -1213,9 +1204,7 @@ } else { // We can only walk child loops if the current loop remained valid. if (UnrollRevisitChildLoops) { - // Walk *all* of the child loops. This is a highly speculative mode - // anyways so look for any simplifications that arose from partial - // unrolling or peeling off of iterations. + // Walk *all* of the child loops. SmallVector ChildLoops(L.begin(), L.end()); Updater.addChildLoops(ChildLoops); } @@ -1223,3 +1212,82 @@ return getLoopPassPreservedAnalyses(); } + +template +static SmallVector appendLoopsToWorklist(RangeT &&Loops) { + SmallVector Worklist; + // We use an internal worklist to build up the preorder traversal without + // recursion. + SmallVector PreOrderLoops, PreOrderWorklist; + + for (Loop *RootL : Loops) { + assert(PreOrderLoops.empty() && "Must start with an empty preorder walk."); + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + Loop *L = PreOrderWorklist.pop_back_val(); + PreOrderWorklist.append(L->begin(), L->end()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + + Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end()); + PreOrderLoops.clear(); + } + return Worklist; +} + +PreservedAnalyses LoopUnrollPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &SE = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &TTI = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &AC = AM.getResult(F); + auto &ORE = AM.getResult(F); + + bool Changed = false; + + // The unroller requires loops to be in simplified form, and also needs LCSSA. + // Since simplification may add new inner loops, it has to run before the + // legality and profitability checks. This means running the loop unroller + // will simplify all loops, regardless of whether anything end up being + // unrolled. + for (auto &L : LI) { + Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */); + Changed |= formLCSSARecursively(*L, DT, &LI, &SE); + } + + SmallVector Worklist = appendLoopsToWorklist(LI); + + while (!Worklist.empty()) { + // Because the LoopInfo stores the loops in RPO, we walk the worklist + // from back to front so that we work forward across the CFG, which + // for unrolling is only needed to get optimization remarks emitted in + // a forward order. + Loop &L = *Worklist.pop_back_val(); + Loop *ParentL = L.getParentLoop(); + + // The API here is quite complex to call, but there are only two interesting + // states we support: partial and full (or "simple") unrolling. However, to + // enable these things we actually pass "None" in for the optional to avoid + // providing an explicit choice. + Optional AllowPartialParam, RuntimeParam, UpperBoundParam; + bool CurChanged = tryToUnrollLoop( + &L, DT, &LI, SE, TTI, AC, ORE, + /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, + /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam); + Changed |= CurChanged; + + // The parent must not be damaged by unrolling! +#ifndef NDEBUG + if (CurChanged && ParentL) + ParentL->verifyLoop(); +#endif + } + + if (!Changed) + return PreservedAnalyses::all(); + + return getLoopPassPreservedAnalyses(); +} Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -140,7 +140,7 @@ ; CHECK-O-NEXT: Running pass: LoopIdiomRecognizePass ; CHECK-EP-LOOP-LATE-NEXT: Running pass: NoOpLoopPass ; CHECK-O-NEXT: Running pass: LoopDeletionPass -; CHECK-O-NEXT: Running pass: LoopUnrollPass +; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass @@ -197,7 +197,7 @@ ; CHECK-O-NEXT: Running pass: SLPVectorizerPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopUnrollPass +; CHECK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -132,7 +132,7 @@ ; CHECK-O-NEXT: Running pass: IndVarSimplifyPass ; CHECK-O-NEXT: Running pass: LoopIdiomRecognizePass ; CHECK-O-NEXT: Running pass: LoopDeletionPass -; CHECK-O-NEXT: Running pass: LoopUnrollPass +; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN @@ -184,7 +184,7 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass -; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopUnrollPass +; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis ; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass Index: test/Transforms/Inline/last-call-bonus.ll =================================================================== --- test/Transforms/Inline/last-call-bonus.ll +++ test/Transforms/Inline/last-call-bonus.ll @@ -10,7 +10,7 @@ ; preprocess the test. ; RUN: opt < %s -loop-unroll -inline -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s -; RUN: opt < %s -passes='function(require,loop(unroll)),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s +; RUN: opt < %s -passes='function(require,unroll),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s ; CHECK-LABEL: define internal i32 @bar() define internal i32 @baz() { Index: test/Transforms/Inline/last-call-no-bonus.ll =================================================================== --- test/Transforms/Inline/last-call-no-bonus.ll +++ test/Transforms/Inline/last-call-no-bonus.ll @@ -2,7 +2,7 @@ ; to the internal functions are cold, thereby preventing the last call to ; static bonus from being applied. -; RUN: opt < %s -passes='function(require,loop(unroll)),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s +; RUN: opt < %s -passes='function(require,unroll),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s ; CHECK-LABEL: define internal i32 @baz define internal i32 @baz() { Index: test/Transforms/LoopUnroll/basic.ll =================================================================== --- test/Transforms/LoopUnroll/basic.ll +++ test/Transforms/LoopUnroll/basic.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-unroll -S | FileCheck %s -; RUN: opt < %s -passes='require,loop(unroll)' -S | FileCheck %s +; RUN: opt < %s -passes='require,unroll' -S | FileCheck %s ; This should not unroll since the address of the loop header is taken. Index: test/Transforms/LoopUnroll/full-unroll-heuristics.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics.ll @@ -26,9 +26,9 @@ ; RUN: opt < %s -S -passes='require,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3 ; Check that these work when the unroller has partial unrolling enabled too. -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1 -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2 -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3 +; RUN: opt < %s -S -passes='require,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1 +; RUN: opt < %s -S -passes='require,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2 +; RUN: opt < %s -S -passes='require,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3 ; If the absolute threshold is too low, we should not unroll: ; TEST1: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv Index: test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll =================================================================== --- test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll +++ test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -unroll-partial-threshold=20 -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-partial-threshold=20 -unroll-threshold=20 -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s +; RUN: opt < %s -S -passes='require,unroll' -unroll-partial-threshold=20 -unroll-threshold=20 -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s ; ; Also check that the simple unroller doesn't allow the partial unrolling. ; RUN: opt < %s -S -passes='require,loop(unroll-full)' -unroll-partial-threshold=20 -unroll-threshold=20 -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s --check-prefix=CHECK-NO-UNROLL Index: test/Transforms/LoopUnroll/revisit.ll =================================================================== --- test/Transforms/LoopUnroll/revisit.ll +++ test/Transforms/LoopUnroll/revisit.ll @@ -4,19 +4,19 @@ ; current two cases. ; ; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \ -; RUN: -passes='require,loop(unroll)' \ +; RUN: -passes='require,loop(unroll-full)' \ ; RUN: | FileCheck %s ; ; Also run in a special mode that visits children. ; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \ -; RUN: -passes='require,loop(unroll)' \ +; RUN: -passes='require,loop(unroll-full)' \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN ; Basic test is fully unrolled and we revisit the post-unroll new sibling ; loops, including the ones that used to be child loops. define void @full_unroll(i1* %ptr) { ; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on full_unroll -; CHECK-NOT: LoopUnrollPass +; CHECK-NOT: LoopFullUnrollPass entry: br label %l0 @@ -39,8 +39,8 @@ l0.0.0: %cond.0.0.0 = load volatile i1, i1* %ptr br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph -; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0
-; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0
+; CHECK-NOT: LoopFullUnrollPass l0.0.1.ph: br label %l0.0.1 @@ -48,30 +48,30 @@ l0.0.1: %cond.0.0.1 = load volatile i1, i1* %ptr br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch -; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1
-; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1
+; CHECK-NOT: LoopFullUnrollPass l0.0.latch: %cmp = icmp slt i32 %iv.next, 2 br i1 %cmp, label %l0.0, label %l0.latch -; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0 -; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0 +; CHECK-NOT: LoopFullUnrollPass ; ; Unrolling occurs, so we visit what were the inner loops twice over. First we ; visit their clones, and then we visit the original loops re-parented. -; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.1
-; CHECK-NOT: LoopUnrollPass -; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.1
-; CHECK-NOT: LoopUnrollPass -; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1
-; CHECK-NOT: LoopUnrollPass -; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0
-; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1.1
+; CHECK-NOT: LoopFullUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.1
+; CHECK-NOT: LoopFullUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1
+; CHECK-NOT: LoopFullUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0
+; CHECK-NOT: LoopFullUnrollPass l0.latch: br label %l0 -; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0
-; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 1 containing: %l0
+; CHECK-NOT: LoopFullUnrollPass exit: ret void @@ -82,7 +82,7 @@ ; default visited, but will be visited with a special parameter. define void @partial_unroll(i32 %count, i1* %ptr) { ; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on partial_unroll -; CHECK-NOT: LoopUnrollPass +; CHECK-NOT: LoopFullUnrollPass entry: br label %l0 @@ -105,8 +105,8 @@ l0.0.0: %cond.0.0.0 = load volatile i1, i1* %ptr br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph -; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0
-; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0
+; CHECK-NOT: LoopFullUnrollPass l0.0.1.ph: br label %l0.0.1 @@ -114,40 +114,40 @@ l0.0.1: %cond.0.0.1 = load volatile i1, i1* %ptr br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch -; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1
-; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1
+; CHECK-NOT: LoopFullUnrollPass l0.0.latch: %cmp = icmp slt i32 %iv.next, %count br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1 -; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0 -; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0 +; CHECK-NOT: LoopFullUnrollPass ; ; Partial unrolling occurs which introduces both new child loops and new sibling ; loops. We only visit the child loops in a special mode, not by default. -; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0
-; CHECK-CHILDREN-NOT: LoopUnrollPass -; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1
-; CHECK-CHILDREN-NOT: LoopUnrollPass -; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0.1
-; CHECK-CHILDREN-NOT: LoopUnrollPass -; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1.1
-; CHECK-CHILDREN-NOT: LoopUnrollPass +; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass +; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass +; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0.1
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass +; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1.1
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass ; ; When we revisit children, we also revisit the current loop. -; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0
-; CHECK-CHILDREN-NOT: LoopUnrollPass +; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass ; ; Revisit the children of the outer loop that are part of the epilogue. ; -; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.epil
-; CHECK-NOT: LoopUnrollPass -; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.epil
-; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.epil
+; CHECK-NOT: LoopFullUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1.epil
+; CHECK-NOT: LoopFullUnrollPass l0.latch: br label %l0 -; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0
-; CHECK-NOT: LoopUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 1 containing: %l0
+; CHECK-NOT: LoopFullUnrollPass exit: ret void Index: test/Transforms/LoopUnroll/runtime-loop.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop.ll +++ test/Transforms/LoopUnroll/runtime-loop.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" Index: test/Transforms/LoopUnroll/runtime-loop1.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop1.ll +++ test/Transforms/LoopUnroll/runtime-loop1.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; This tests that setting the unroll count works Index: test/Transforms/LoopUnroll/runtime-loop2.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop2.ll +++ test/Transforms/LoopUnroll/runtime-loop2.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -passes='require,unroll' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -passes='require,unroll' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; Choose a smaller, power-of-two, unroll count if the loop is too large. ; This test makes sure we're not unrolling 'odd' counts Index: test/Transforms/LoopUnroll/runtime-loop3.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop3.ll +++ test/Transforms/LoopUnroll/runtime-loop3.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-partial-threshold=200 -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS -; RUN: opt < %s -disable-output -stats -passes='require,loop(unroll)' -unroll-runtime -unroll-partial-threshold=200 -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS +; RUN: opt < %s -disable-output -stats -passes='require,unroll' -unroll-runtime -unroll-partial-threshold=200 -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS ; Test that nested loops can be unrolled. We need to increase threshold to do it Index: test/Transforms/LoopUnroll/runtime-loop5.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop5.ll +++ test/Transforms/LoopUnroll/runtime-loop5.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s -; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s +; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s +; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s ; Given that the trip-count of this loop is a 3-bit value, we cannot ; safely unroll it with a count of anything more than 8. Index: test/Transforms/LoopUnroll/unloop.ll =================================================================== --- test/Transforms/LoopUnroll/unloop.ll +++ test/Transforms/LoopUnroll/unloop.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s -; RUN: opt < %s -S -passes='require,loop(unroll),verify' | FileCheck %s +; RUN: opt < %s -S -passes='require,unroll,verify' | FileCheck %s ; ; Unit tests for LoopInfo::markAsRemoved. Index: test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll =================================================================== --- test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll +++ test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll @@ -1,5 +1,5 @@ ; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s -; RUN: opt -S < %s -passes='require,loop(unroll),require' | FileCheck %s +; RUN: opt -S < %s -passes='require,unroll,require' | FileCheck %s ; Crasher from PR20987. ; CHECK: define void @update_loop_info_in_subloops