diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -179,6 +179,12 @@ void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA = nullptr); +/// Remove the backedge of the specified loop. Handles loop nests and general +/// loop structures subject to the precondition that the loop has a single +/// latch block. Preserves all listed analyses. +void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA); + /// Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -26,6 +26,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" + using namespace llvm; #define DEBUG_TYPE "loop-delete" @@ -38,6 +39,14 @@ Deleted, }; +static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) { + if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted) + return LoopDeletionResult::Deleted; + if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified) + return LoopDeletionResult::Modified; + return LoopDeletionResult::Unmodified; +} + /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -126,6 +135,26 @@ return true; } +/// If we can prove the backedge is untaken, remove it. This destroys the +/// loop, but leaves the (now trivially loop invariant) control flow and +/// side effects (if any) in place. +static LoopDeletionResult +breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA, + OptimizationRemarkEmitter &ORE) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + + if (!L->getLoopLatch()) + return LoopDeletionResult::Unmodified; + + auto *BTC = SE.getBackedgeTakenCount(L); + if (!BTC->isZero()) + return LoopDeletionResult::Unmodified; + + breakLoopBackedge(L, DT, SE, LI, MSSA); + return LoopDeletionResult::Deleted; +} + /// Remove a loop if it is dead. /// /// A loop is considered dead if it does not impact the observable behavior of @@ -162,7 +191,6 @@ return LoopDeletionResult::Unmodified; } - BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (ExitBlock && isLoopNeverExecuted(L)) { @@ -240,6 +268,14 @@ // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE); + + // If we can prove the backedge isn't taken, just break it and be done. This + // leaves the loop structure in place which means it can handle dispatching + // to the right exit based on whatever loop invariant structure remains. + if (Result != LoopDeletionResult::Deleted) + Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI, + AR.MSSA, ORE)); + if (Result == LoopDeletionResult::Unmodified) return PreservedAnalyses::all(); @@ -299,6 +335,12 @@ LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE); + // If we can prove the backedge isn't taken, just break it and be done. This + // leaves the loop structure in place which means it can handle dispatching + // to the right exit based on whatever loop invariant structure remains. + if (Result != LoopDeletionResult::Deleted) + Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE)); + if (Result == LoopDeletionResult::Deleted) LPM.markLoopAsDeleted(*L); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -756,6 +756,37 @@ } } +void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA) { + + auto *Latch = L->getLoopLatch(); + assert(Latch); + auto *Header = L->getHeader(); + + SE.forgetLoop(L); + + // Note: By splitting the backedge, and then explicitly making it unreachable + // we gracefully handle corner cases such as non-bottom tested loops and the + // like. We also have the benefit of being able to reuse existing well tested + // code. It might be worth special casing the common bottom tested case at + // some point to avoid code churn. + + std::unique_ptr MSSAU; + if (MSSA) + MSSAU = std::make_unique(MSSA); + + auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); + + DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); + (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false, + /*PreserveLCSSA*/true, &DTU, MSSAU.get()); + + // Erase (and destroy) this loop instance. Handles relinking sub-loops + // and blocks within the loop as needed. + LI.erase(L); +} + + /// Checks if \p L has single exit through latch block except possibly /// "deoptimizing" exits. Returns branch instruction terminating the loop /// latch if above check is successful, nullptr otherwise. diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll @@ -23,8 +23,8 @@ ; CHECK-NEXT: [[I6:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[I24:%.*]] = load i32, i32* @b, align 4 ; CHECK-NEXT: [[D_PROMOTED9:%.*]] = load i32, i32* @d, align 4 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: +; CHECK-NEXT: br label [[BB13_PREHEADER:%.*]] +; CHECK: bb13.preheader: ; CHECK-NEXT: [[I8_LCSSA10:%.*]] = phi i32 [ [[D_PROMOTED9]], [[BB:%.*]] ], [ [[I8:%.*]], [[BB19_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I8]] = and i32 [[I8_LCSSA10]], [[I6]] ; CHECK-NEXT: [[I21:%.*]] = icmp eq i32 [[I8]], 0 @@ -33,7 +33,7 @@ ; CHECK-NEXT: [[I26:%.*]] = urem i32 [[I24]], [[I8]] ; CHECK-NEXT: store i32 [[I26]], i32* @e, align 4 ; CHECK-NEXT: [[I30_NOT:%.*]] = icmp eq i32 [[I26]], 0 -; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB1]] +; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB13_PREHEADER]] ; CHECK: bb13.preheader.bb27.thread.split_crit_edge: ; CHECK-NEXT: store i32 -1, i32* @f, align 4 ; CHECK-NEXT: store i32 0, i32* @d, align 4 diff --git a/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll b/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll --- a/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll +++ b/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll @@ -89,8 +89,10 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[UNKNOWN_NEXT:%.*]] = load volatile i32, i32* [[UNKNOWN_MEM:%.*]] -; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LEAVE:%.*]] +; CHECK-NEXT: [[UNKNOWN_NEXT:%.*]] = load volatile i32, i32* [[UNKNOWN_MEM:%.*]], align 4 +; CHECK-NEXT: br i1 false, label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[LEAVE:%.*]] +; CHECK: loop.loop_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: leave: ; CHECK-NEXT: ret i32 [[UNKNOWN_INIT:%.*]] ; diff --git a/llvm/test/Transforms/LoopDeletion/update-scev.ll b/llvm/test/Transforms/LoopDeletion/update-scev.ll --- a/llvm/test/Transforms/LoopDeletion/update-scev.ll +++ b/llvm/test/Transforms/LoopDeletion/update-scev.ll @@ -44,7 +44,8 @@ %conv10 = zext i1 %cmp9 to i32 %and = and i32 %conv10, %g.138 %inc = add i32 %h.039, 1 - br i1 undef, label %for.inc11, label %for.body6 + %exit = icmp eq i32 %inc, 20000 + br i1 %exit, label %for.inc11, label %for.body6 for.inc11: ; preds = %for.body6 %and.lcssa = phi i32 [ %and, %for.body6 ] diff --git a/llvm/test/Transforms/LoopDeletion/zero-btc.ll b/llvm/test/Transforms/LoopDeletion/zero-btc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopDeletion/zero-btc.ll @@ -0,0 +1,335 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-deletion -S | FileCheck %s + +@G = external global i32 + +define void @test_trivial() { +; CHECK-LABEL: @test_trivial( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: br i1 false, label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]] +; CHECK: loop.loop_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + store i32 0, i32* @G + br i1 false, label %loop, label %exit + +exit: + ret void +} + + +define void @test_bottom_tested() { +; CHECK-LABEL: @test_bottom_tested( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1 +; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]] +; CHECK: loop.loop_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry], [ %iv.inc, %loop ] + store i32 0, i32* @G + %iv.inc = add i32 %iv, 1 + %be_taken = icmp ne i32 %iv.inc, 1 + br i1 %be_taken, label %loop, label %exit + +exit: + ret void +} + +define void @test_early_exit() { +; CHECK-LABEL: @test_early_exit( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1 +; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH:%.*]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: br label [[LATCH_SPLIT:%.*]] +; CHECK: latch.split: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry], [ %iv.inc, %latch ] + store i32 0, i32* @G + %iv.inc = add i32 %iv, 1 + %be_taken = icmp ne i32 %iv.inc, 1 + br i1 %be_taken, label %latch, label %exit +latch: + br label %loop + +exit: + ret void +} + +define void @test_multi_exit1() { +; CHECK-LABEL: @test_multi_exit1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1 +; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH:%.*]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store i32 1, i32* @G, align 4 +; CHECK-NEXT: [[COND2:%.*]] = icmp ult i32 [[IV_INC]], 30 +; CHECK-NEXT: br i1 [[COND2]], label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT]] +; CHECK: latch.loop_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry], [ %iv.inc, %latch ] + store i32 0, i32* @G + %iv.inc = add i32 %iv, 1 + %be_taken = icmp ne i32 %iv.inc, 1 + br i1 %be_taken, label %latch, label %exit +latch: + store i32 1, i32* @G + %cond2 = icmp ult i32 %iv.inc, 30 + br i1 %cond2, label %loop, label %exit + +exit: + ret void +} + +define void @test_multi_exit2() { +; CHECK-LABEL: @test_multi_exit2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: br i1 true, label [[LATCH:%.*]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store i32 1, i32* @G, align 4 +; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT]] +; CHECK: latch.loop_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + store i32 0, i32* @G + br i1 true, label %latch, label %exit +latch: + store i32 1, i32* @G + br i1 false, label %loop, label %exit + +exit: + ret void +} + +; TODO: SCEV seems not to recognize this as a zero btc loop +define void @test_multi_exit3(i1 %cond1) { +; CHECK-LABEL: @test_multi_exit3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store i32 1, i32* @G, align 4 +; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1 +; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry], [ %iv.inc, %latch ] + store i32 0, i32* @G + br i1 %cond1, label %latch, label %exit +latch: + store i32 1, i32* @G + %iv.inc = add i32 %iv, 1 + %be_taken = icmp ne i32 %iv.inc, 1 + br i1 %be_taken, label %loop, label %exit + +exit: + ret void +} + +; Subtle - This is either zero btc, or infinite, thus, can't break +; backedge +define void @test_multi_exit4(i1 %cond1, i1 %cond2) { +; CHECK-LABEL: @test_multi_exit4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LATCH:%.*]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store i32 1, i32* @G, align 4 +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + store i32 0, i32* @G + br i1 %cond1, label %latch, label %exit +latch: + store i32 1, i32* @G + br i1 %cond2, label %loop, label %exit + +exit: + ret void +} + +; A simple case with multiple exit blocks +define void @test_multi_exit5() { +; CHECK-LABEL: @test_multi_exit5( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: br i1 true, label [[LATCH:%.*]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: store i32 1, i32* @G, align 4 +; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT2:%.*]] +; CHECK: latch.loop_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: exit1: +; CHECK-NEXT: ret void +; CHECK: exit2: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + store i32 0, i32* @G + br i1 true, label %latch, label %exit1 +latch: + store i32 1, i32* @G + br i1 false, label %loop, label %exit2 + +exit1: + ret void +exit2: + ret void +} + +define void @test_live_inner() { +; CHECK-LABEL: @test_live_inner( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[IV_INC:%.*]], [[INNER]] ] +; CHECK-NEXT: store i32 [[IV]], i32* @G, align 4 +; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CND:%.*]] = icmp ult i32 [[IV_INC]], 200 +; CHECK-NEXT: br i1 [[CND]], label [[INNER]], label [[LATCH:%.*]] +; CHECK: latch: +; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]] +; CHECK: latch.loop_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + store i32 0, i32* @G + br label %inner + +inner: + %iv = phi i32 [0, %loop], [%iv.inc, %inner] + store i32 %iv, i32* @G + %iv.inc = add i32 %iv, 1 + %cnd = icmp ult i32 %iv.inc, 200 + br i1 %cnd, label %inner, label %latch + +latch: + br i1 false, label %loop, label %exit + +exit: + ret void +} + +define void @test_live_outer() { +; CHECK-LABEL: @test_live_outer( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: store i32 0, i32* @G, align 4 +; CHECK-NEXT: br i1 false, label [[INNER_INNER_CRIT_EDGE:%.*]], label [[LATCH]] +; CHECK: inner.inner_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: latch: +; CHECK-NEXT: store i32 [[IV]], i32* @G, align 4 +; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CND:%.*]] = icmp ult i32 [[IV_INC]], 200 +; CHECK-NEXT: br i1 [[CND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.inc, %latch] + br label %inner + +inner: + store i32 0, i32* @G + br i1 false, label %inner, label %latch + +latch: + store i32 %iv, i32* @G + %iv.inc = add i32 %iv, 1 + %cnd = icmp ult i32 %iv.inc, 200 + br i1 %cnd, label %loop, label %exit + +exit: + ret void +}