Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -179,6 +179,12 @@ void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA = nullptr); +/// Remove the backedge of the specified loop. Handles loop nests and general +/// loop structures subject to the precondition that the loop has a single +/// latch block. Preserves all listed analyses. +void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA); + /// Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are Index: llvm/lib/Transforms/Scalar/LoopDeletion.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -26,6 +26,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" + using namespace llvm; #define DEBUG_TYPE "loop-delete" @@ -38,6 +39,14 @@ Deleted, }; +static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) { + if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted) + return LoopDeletionResult::Deleted; + if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified) + return LoopDeletionResult::Modified; + return LoopDeletionResult::Unmodified; +} + /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -126,6 +135,26 @@ return true; } +/// If we can prove the backedge is untaken, remove it. This destroys the +/// loop, but leaves the (now trivially loop invariant) control flow and +/// side effects (if any) in place. +static LoopDeletionResult +breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA, + OptimizationRemarkEmitter &ORE) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + + if (!L->getLoopLatch()) + return LoopDeletionResult::Unmodified; + + auto *BTC = SE.getBackedgeTakenCount(L); + if (!BTC->isZero()) + return LoopDeletionResult::Unmodified; + + breakLoopBackedge(L, DT, SE, LI, MSSA); + return LoopDeletionResult::Deleted; +} + /// Remove a loop if it is dead. /// /// A loop is considered dead either if it does not impact the observable @@ -236,6 +265,14 @@ // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE); + + // If we can prove the backedge isn't taken, just break it and be done. This + // leaves the loop structure in place which means it can handle dispatching + // to the right exit based on whatever loop invariant structure remains. + if (Result != LoopDeletionResult::Deleted) + Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI, + AR.MSSA, ORE)); + if (Result == LoopDeletionResult::Unmodified) return PreservedAnalyses::all(); @@ -295,6 +332,12 @@ LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE); + // If we can prove the backedge isn't taken, just break it and be done. This + // leaves the loop structure in place which means it can handle dispatching + // to the right exit based on whatever loop invariant structure remains. + if (Result != LoopDeletionResult::Deleted) + Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE)); + if (Result == LoopDeletionResult::Deleted) LPM.markLoopAsDeleted(*L); Index: llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -762,6 +762,37 @@ } } +void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA) { + + auto *Latch = L->getLoopLatch(); + assert(Latch); + auto *Header = L->getHeader(); + + SE.forgetLoop(L); + + // Note: By splitting the backedge, and then explicitly making it unreachable + // we gracefully handle corner cases such as non-bottom tested loops and the + // like. We also have the benefit of being able to reuse existing well tested + // code. It might be worth special casing the common bottom tested case at + // some point to avoid code churn. + + std::unique_ptr MSSAU; + if (MSSA) + MSSAU = std::make_unique(MSSA); + + auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); + + DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); + (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false, + /*PreserveLCSSA*/true, &DTU, MSSAU.get()); + + // Erase (and destroy) this loop instance. Handles relinking sub-loops + // and blocks within the loop as needed. + LI.erase(L); +} + + /// Checks if \p L has single exit through latch block except possibly /// "deoptimizing" exits. Returns branch instruction terminating the loop /// latch if above check is successful, nullptr otherwise. Index: llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll +++ llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll @@ -23,8 +23,8 @@ ; CHECK-NEXT: [[I6:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[I24:%.*]] = load i32, i32* @b, align 4 ; CHECK-NEXT: [[D_PROMOTED9:%.*]] = load i32, i32* @d, align 4 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: +; CHECK-NEXT: br label [[BB13_PREHEADER:%.*]] +; CHECK: bb13.preheader: ; CHECK-NEXT: [[I8_LCSSA10:%.*]] = phi i32 [ [[D_PROMOTED9]], [[BB:%.*]] ], [ [[I8:%.*]], [[BB19_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I8]] = and i32 [[I8_LCSSA10]], [[I6]] ; CHECK-NEXT: [[I21:%.*]] = icmp eq i32 [[I8]], 0 @@ -33,7 +33,7 @@ ; CHECK-NEXT: [[I26:%.*]] = urem i32 [[I24]], [[I8]] ; CHECK-NEXT: store i32 [[I26]], i32* @e, align 4 ; CHECK-NEXT: [[I30_NOT:%.*]] = icmp eq i32 [[I26]], 0 -; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB1]] +; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB13_PREHEADER]] ; CHECK: bb13.preheader.bb27.thread.split_crit_edge: ; CHECK-NEXT: store i32 -1, i32* @f, align 4 ; CHECK-NEXT: store i32 0, i32* @d, align 4 Index: llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll +++ llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll @@ -89,8 +89,10 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[UNKNOWN_NEXT:%.*]] = load volatile i32, i32* [[UNKNOWN_MEM:%.*]] -; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LEAVE:%.*]] +; CHECK-NEXT: [[UNKNOWN_NEXT:%.*]] = load volatile i32, i32* [[UNKNOWN_MEM:%.*]], align 4 +; CHECK-NEXT: br i1 false, label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[LEAVE:%.*]] +; CHECK: loop.loop_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: leave: ; CHECK-NEXT: ret i32 [[UNKNOWN_INIT:%.*]] ; Index: llvm/test/Transforms/LoopDeletion/update-scev.ll =================================================================== --- llvm/test/Transforms/LoopDeletion/update-scev.ll +++ llvm/test/Transforms/LoopDeletion/update-scev.ll @@ -44,7 +44,8 @@ %conv10 = zext i1 %cmp9 to i32 %and = and i32 %conv10, %g.138 %inc = add i32 %h.039, 1 - br i1 undef, label %for.inc11, label %for.body6 + %exit = icmp eq i32 %inc, 20000 + br i1 %exit, label %for.inc11, label %for.body6 for.inc11: ; preds = %for.body6 %and.lcssa = phi i32 [ %and, %for.body6 ] Index: llvm/test/Transforms/LoopDeletion/zero-btc.ll =================================================================== --- llvm/test/Transforms/LoopDeletion/zero-btc.ll +++ llvm/test/Transforms/LoopDeletion/zero-btc.ll @@ -9,7 +9,9 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 false, label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]] +; CHECK: loop.loop_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -30,11 +32,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1 -; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]] +; CHECK: loop.loop_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -57,13 +61,15 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1 -; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH:%.*]], label [[EXIT:%.*]] ; CHECK: latch: -; CHECK-NEXT: br label [[LOOP]] +; CHECK-NEXT: br label [[LATCH_SPLIT:%.*]] +; CHECK: latch.split: +; CHECK-NEXT: unreachable ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -88,15 +94,17 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1 -; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH:%.*]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: store i32 1, i32* @G, align 4 ; CHECK-NEXT: [[COND2:%.*]] = icmp ult i32 [[IV_INC]], 30 -; CHECK-NEXT: br i1 [[COND2]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 [[COND2]], label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT]] +; CHECK: latch.loop_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -127,7 +135,9 @@ ; CHECK-NEXT: br i1 true, label [[LATCH:%.*]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: store i32 1, i32* @G, align 4 -; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT]] +; CHECK: latch.loop_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -218,7 +228,9 @@ ; CHECK-NEXT: br i1 true, label [[LATCH:%.*]], label [[EXIT1:%.*]] ; CHECK: latch: ; CHECK-NEXT: store i32 1, i32* @G, align 4 -; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT2:%.*]] +; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT2:%.*]] +; CHECK: latch.loop_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: exit1: ; CHECK-NEXT: ret void ; CHECK: exit2: @@ -254,7 +266,9 @@ ; CHECK-NEXT: [[CND:%.*]] = icmp ult i32 [[IV_INC]], 200 ; CHECK-NEXT: br i1 [[CND]], label [[INNER]], label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]] +; CHECK: latch.loop_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -288,7 +302,9 @@ ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: br i1 false, label [[INNER]], label [[LATCH]] +; CHECK-NEXT: br i1 false, label [[INNER_INNER_CRIT_EDGE:%.*]], label [[LATCH]] +; CHECK: inner.inner_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: latch: ; CHECK-NEXT: store i32 [[IV]], i32* @G, align 4 ; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1