Index: lib/Transforms/Scalar/LoopPassManager.cpp =================================================================== --- lib/Transforms/Scalar/LoopPassManager.cpp +++ lib/Transforms/Scalar/LoopPassManager.cpp @@ -42,6 +42,13 @@ break; } +#ifndef NDEBUG + // Verify the loop structure and LCSSA form before visiting the loop. + L.verifyLoop(); + assert(L.isRecursivelyLCSSAForm(AR.DT, AR.LI) && + "Loops must remain in LCSSA form!"); +#endif + // Update the analysis manager as each pass runs and potentially // invalidates analyses. AM.invalidate(L, PassPA); Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1117,7 +1117,7 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, - LPMUpdater &) { + LPMUpdater &Updater) { const auto &FAM = AM.getResult(L, AR).getManager(); Function *F = L.getHeader()->getParent(); @@ -1128,6 +1128,15 @@ report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not " "cached at a higher level"); + // Keep track of the previous loop structure so we can identify new loops + // created by unrolling. + Loop *ParentL = L.getParentLoop(); + SmallPtrSet OldLoops; + if (ParentL) + OldLoops.insert(ParentL->begin(), ParentL->end()); + else + OldLoops.insert(AR.LI.begin(), AR.LI.end()); + bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE, /*PreserveLCSSA*/ true, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, @@ -1135,5 +1144,30 @@ if (!Changed) return PreservedAnalyses::all(); + // The parent must not be damaged by unrolling! + if (ParentL) + ParentL->verifyLoop(); + + // We need to update the loop pass manager, so we look to see if our loop + // still exists and enque any new loops into the worklist. + bool IsCurrentLoopValid = false; + SmallVector SibLoops; + if (ParentL) + SibLoops.append(ParentL->begin(), ParentL->end()); + else + SibLoops.append(AR.LI.begin(), AR.LI.end()); + erase_if(SibLoops, [&](Loop *SibLoop) { + if (SibLoop == &L) { + IsCurrentLoopValid = true; + return true; + } + + // Otherwise erase the loop from the list if it was in the old loops. + return OldLoops.count(SibLoop) != 0; + }); + Updater.addSiblingLoops(SibLoops); + if (!IsCurrentLoopValid) + Updater.markLoopAsDeleted(L); + return getLoopPassPreservedAnalyses(); } Index: test/Transforms/LoopUnroll/basic.ll =================================================================== --- test/Transforms/LoopUnroll/basic.ll +++ test/Transforms/LoopUnroll/basic.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-unroll -S | FileCheck %s +; RUN: opt < %s -passes='require,loop(unroll)' -S | FileCheck %s ; This should not unroll since the address of the loop header is taken. Index: test/Transforms/LoopUnroll/full-unroll-bad-cost.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-bad-cost.ll +++ test/Transforms/LoopUnroll/full-unroll-bad-cost.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-unroll < %s | FileCheck %s +; RUN: opt < %s -passes='require,loop(unroll)' -S | FileCheck %s ; LLVM should not try to fully unroll this loop. Index: test/Transforms/LoopUnroll/full-unroll-crashers.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-crashers.ll +++ test/Transforms/LoopUnroll/full-unroll-crashers.ll @@ -1,5 +1,6 @@ ; Check that we don't crash on corner cases. ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null +; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16 Index: test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s +; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 Index: test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s +; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16 Index: test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s +; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0], align 16 Index: test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s +; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; When examining gep-instructions we shouldn't consider them simplified if the Index: test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s +; RUN: opt < %s -S -passes='require,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" define i64 @propagate_loop_phis() { Index: test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll +++ test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-unroll < %s | FileCheck %s +; RUN: opt -S -passes='require,loop(unroll)' < %s | FileCheck %s ; Unroll twice, with first loop exit kept ; CHECK-LABEL: @s32_max1 Index: test/Transforms/LoopUnroll/runtime-loop.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop.ll +++ test/Transforms/LoopUnroll/runtime-loop.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -passes='loop-simplify,require,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -passes='loop-simplify,require,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" ; Tests for unrolling loops with run-time trip counts Index: test/Transforms/LoopUnroll/runtime-loop1.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop1.ll +++ test/Transforms/LoopUnroll/runtime-loop1.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -passes='loop-simplify,require,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -passes='loop-simplify,require,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG + ; This tests that setting the unroll count works Index: test/Transforms/LoopUnroll/runtime-loop2.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop2.ll +++ test/Transforms/LoopUnroll/runtime-loop2.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -passes='loop-simplify,require,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -passes='loop-simplify,require,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG + ; Choose a smaller, power-of-two, unroll count if the loop is too large. ; This test makes sure we're not unrolling 'odd' counts Index: test/Transforms/LoopUnroll/runtime-loop3.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop3.ll +++ test/Transforms/LoopUnroll/runtime-loop3.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts ; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS +; RUN: opt < %s -disable-output -stats -passes='loop-simplify,require,loop(unroll)' -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS ; Test that nested loops can be unrolled. We need to increase threshold to do it Index: test/Transforms/LoopUnroll/runtime-loop5.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop5.ll +++ test/Transforms/LoopUnroll/runtime-loop5.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s +; RUN: opt < %s -S -passes='loop-simplify,require,loop(unroll)' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s +; RUN: opt < %s -S -passes='loop-simplify,require,loop(unroll)' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s + ; Given that the trip-count of this loop is a 3-bit value, we cannot ; safely unroll it with a count of anything more than 8. Index: test/Transforms/LoopUnroll/unloop.ll =================================================================== --- test/Transforms/LoopUnroll/unloop.ll +++ test/Transforms/LoopUnroll/unloop.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s -; RUN: opt < %s -S -passes='function(require,require,require,loop(unroll),verify)' | FileCheck %s +; RUN: opt < %s -S -passes='require,loop(unroll),verify' | FileCheck %s ; ; Unit tests for LoopInfo::markAsRemoved. Index: test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll =================================================================== --- test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll +++ test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll @@ -1,4 +1,5 @@ ; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s +; RUN: opt -S < %s -passes='require,loop(unroll),require' | FileCheck %s ; Crasher from PR20987. ; CHECK: define void @update_loop_info_in_subloops