diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -151,14 +151,6 @@ if (!BTC->isZero()) return LoopDeletionResult::Unmodified; - // For non-outermost loops, the tricky case is that we can drop blocks - // out of both inner and outer loops at the same time. This results in - // new exiting block for the outer loop appearing, and possibly needing - // an lcssa phi inserted. (See loop_nest_lcssa test case in zero-btc.ll) - // TODO: We can handle a bunch of cases here without much work, revisit. - if (!L->isOutermost()) - return LoopDeletionResult::Unmodified; - breakLoopBackedge(L, DT, SE, LI, MSSA); return LoopDeletionResult::Deleted; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -761,13 +761,18 @@ } } +static Loop *getOutermostLoop(Loop *L) { + while (Loop *Parent = L->getParentLoop()) + L = Parent; + return L; +} + void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA) { - - assert(L->isOutermost() && "Can't yet preserve LCSSA for this case"); auto *Latch = L->getLoopLatch(); assert(Latch && "multiple latches not yet supported"); auto *Header = L->getHeader(); + Loop *OutermostLoop = getOutermostLoop(L); SE.forgetLoop(L); @@ -790,6 +795,14 @@ // Erase (and destroy) this loop instance. Handles relinking sub-loops // and blocks within the loop as needed. LI.erase(L); + + // If the loop we broke had a parent, then changeToUnreachable might have + // caused a block to be removed from the parent loop (see loop_nest_lcssa + // test case in zero-btc.ll for an example), thus changing the parent's + // exit blocks. If that happened, we need to rebuild LCSSA on the outermost + // loop which might have a had a block removed. + if (OutermostLoop != L) + formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); } diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll @@ -23,8 +23,8 @@ ; CHECK-NEXT: [[I6:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[I24:%.*]] = load i32, i32* @b, align 4 ; CHECK-NEXT: [[D_PROMOTED9:%.*]] = load i32, i32* @d, align 4 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: +; CHECK-NEXT: br label [[BB13_PREHEADER:%.*]] +; CHECK: bb13.preheader: ; CHECK-NEXT: [[I8_LCSSA10:%.*]] = phi i32 [ [[D_PROMOTED9]], [[BB:%.*]] ], [ [[I8:%.*]], [[BB19_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I8]] = and i32 [[I8_LCSSA10]], [[I6]] ; CHECK-NEXT: [[I21:%.*]] = icmp eq i32 [[I8]], 0 @@ -33,7 +33,7 @@ ; CHECK-NEXT: [[I26:%.*]] = urem i32 [[I24]], [[I8]] ; CHECK-NEXT: store i32 [[I26]], i32* @e, align 4 ; CHECK-NEXT: [[I30_NOT:%.*]] = icmp eq i32 [[I26]], 0 -; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB1]] +; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB13_PREHEADER]] ; CHECK: bb13.preheader.bb27.thread.split_crit_edge: ; CHECK-NEXT: store i32 -1, i32* @f, align 4 ; CHECK-NEXT: store i32 0, i32* @d, align 4 diff --git a/llvm/test/Transforms/LoopDeletion/zero-btc.ll b/llvm/test/Transforms/LoopDeletion/zero-btc.ll --- a/llvm/test/Transforms/LoopDeletion/zero-btc.ll +++ b/llvm/test/Transforms/LoopDeletion/zero-btc.ll @@ -302,7 +302,9 @@ ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: br i1 false, label [[INNER]], label [[LATCH]] +; CHECK-NEXT: br i1 false, label [[INNER_INNER_CRIT_EDGE:%.*]], label [[LATCH]] +; CHECK: inner.inner_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: latch: ; CHECK-NEXT: store i32 [[IV]], i32* @G, align 4 ; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 @@ -346,11 +348,14 @@ ; CHECK: inner_header: ; CHECK-NEXT: br i1 false, label [[INNER_LATCH:%.*]], label [[OUTER_LATCH:%.*]] ; CHECK: inner_latch: -; CHECK-NEXT: br i1 false, label [[INNER_HEADER]], label [[LOOPEXIT:%.*]] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP0]], [[INNER_HEADER]] ] +; CHECK-NEXT: br i1 false, label [[INNER_LATCH_INNER_HEADER_CRIT_EDGE:%.*]], label [[LOOPEXIT:%.*]] +; CHECK: inner_latch.inner_header_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: outer_latch: ; CHECK-NEXT: br label [[OUTER_HEADER]] ; CHECK: loopexit: -; CHECK-NEXT: [[DOTLCSSA32:%.*]] = phi i32 [ [[TMP0]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[DOTLCSSA32:%.*]] = phi i32 [ [[DOTLCSSA]], [[INNER_LATCH]] ] ; CHECK-NEXT: unreachable ; entry: