Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -423,14 +423,6 @@ if (!PreserveLCSSA) return false; - // FIXME: We bail out of multi-exit unrolling when epilog loop is generated - // and L is an inner loop. This is because in presence of multiple exits, the - // outer loop is incorrect: we do not add the EpilogPreheader and exit to the - // outer loop. This is automatically handled in the prolog case, so we do not - // have that bug in prolog generation. - if (UseEpilogRemainder && L->getParentLoop()) - return false; - // All constraints have been satisfied. return true; } @@ -456,6 +448,11 @@ if (!LatchExit->getSinglePredecessor()) return false; + // TODO: We used to bail out for correctness (now fixed). Under what + // circumstances is this case profitable to allow? + if (UseEpilogRemainder && L->getParentLoop()) + return false; + // The main pain point with multi-exit loop unrolling is that once unrolled, // we will not be able to merge all blocks into a straight line code. // There are branches within the unrolled loop that go to the OtherExits. @@ -729,6 +726,21 @@ // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); + + // If the latch exits from multiple level of nested loops, then + // by assumption there must be another loop exit which branches to the + // outer loop and we must adjust the loop for the newly inserted blocks + // to account for the fact that our epilogue is still in the same outer + // loop. Note that this leaves loopinfo temporarily out of sync with the + // CFG until the actual epilogue loop is inserted. + if (auto *ParentL = L->getParentLoop()) + if (LI->getLoopFor(LatchExit) != ParentL) { + LI->removeBlock(NewExit); + ParentL->addBasicBlockToLoop(NewExit, *LI); + LI->removeBlock(EpilogPreHeader); + ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI); + } + } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop Index: llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -4271,74 +4271,162 @@ ret i32 %sext3 } -; Nested loop and inner loop is unrolled -; FIXME: we cannot unroll with epilog remainder currently, because -; the outer loop does not contain the epilog preheader and epilog exit (while -; infact it should). This causes us to choke up on LCSSA form being incorrect in -; outer loop. However, the exit block where LCSSA fails, is infact still within -; the outer loop. For now, we just bail out in presence of outer loop and epilog -; loop is generated. -; The outer loop header is the preheader for the inner loop and the inner header -; branches back to the outer loop. +; Nested loop and inner loop is unrolled. The tricky bit here is that +; several blocks in the inner loop fall out of the outer block when we +; use the epilogue block remainder strategy. define void @test8() { ; EPILOG-LABEL: @test8( ; EPILOG-NEXT: bb: ; EPILOG-NEXT: br label %outerloop +; EPILOG: outerloop.loopexit.loopexit: +; EPILOG-NEXT: br label %outerloop.loopexit +; EPILOG: outerloop.loopexit.loopexit1: +; EPILOG-NEXT: br label %outerloop.loopexit ; EPILOG: outerloop.loopexit: ; EPILOG-NEXT: br label %outerloop ; EPILOG: outerloop: ; EPILOG-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit ] +; EPILOG-NEXT: %0 = sub i64 100, %i +; EPILOG-NEXT: %1 = sub i64 99, %i +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 +; EPILOG-NEXT: br i1 %2, label %exit.unr-lcssa, label %outerloop.new +; EPILOG: outerloop.new: +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %innerH ; EPILOG: innerH: -; EPILOG-NEXT: %i3 = phi i64 [ %i4, %latch ], [ %i, %outerloop ] +; EPILOG-NEXT: %i3 = phi i64 [ %i, %outerloop.new ], [ %i4.7, %latch.7 ] +; EPILOG-NEXT: %niter = phi i64 [ %unroll_iter, %outerloop.new ], [ %niter.nsub.7, %latch.7 ] ; EPILOG-NEXT: %i4 = add nuw nsw i64 %i3, 1 -; EPILOG-NEXT: br i1 false, label %outerloop.loopexit, label %latch +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch ; EPILOG: latch: -; EPILOG-NEXT: %i6 = icmp ult i64 %i4, 100 -; EPILOG-NEXT: br i1 %i6, label %innerH, label %exit +; EPILOG-NEXT: %niter.nsub = sub i64 %niter, 1 +; EPILOG-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 +; EPILOG: exit.unr-lcssa.loopexit: +; EPILOG-NEXT: %i3.unr.ph = phi i64 [ %i4.7, %latch.7 ] +; EPILOG-NEXT: br label %exit.unr-lcssa +; EPILOG: exit.unr-lcssa: +; EPILOG-NEXT: %i3.unr = phi i64 [ %i, %outerloop ], [ %i3.unr.ph, %exit.unr-lcssa.loopexit ] +; EPILOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 +; EPILOG-NEXT: br i1 %lcmp.mod, label %innerH.epil.preheader, label %exit.loopexit +; EPILOG: innerH.epil.preheader: +; EPILOG-NEXT: br label %innerH.epil +; EPILOG: innerH.epil: +; EPILOG-NEXT: %i3.epil = phi i64 [ %i4.epil, %latch.epil ], [ %i3.unr, %innerH.epil.preheader ] +; EPILOG-NEXT: %epil.iter = phi i64 [ %xtraiter, %innerH.epil.preheader ], [ %epil.iter.sub, %latch.epil ] +; EPILOG-NEXT: %i4.epil = add nuw nsw i64 %i3.epil, 1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit1, label %latch.epil +; EPILOG: latch.epil: +; EPILOG-NEXT: %i6.epil = icmp ult i64 %i4.epil, 100 +; EPILOG-NEXT: %epil.iter.sub = sub i64 %epil.iter, 1 +; EPILOG-NEXT: %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0 +; EPILOG-NEXT: br i1 %epil.iter.cmp, label %innerH.epil, label %exit.epilog-lcssa, !llvm.loop !11 +; EPILOG: exit.epilog-lcssa: +; EPILOG-NEXT: br label %exit +; EPILOG: exit.loopexit: +; EPILOG-NEXT: br label %exit ; EPILOG: exit: ; EPILOG-NEXT: ret void +; EPILOG: latch.1: +; EPILOG-NEXT: %niter.nsub.1 = sub i64 %niter.nsub, 1 +; EPILOG-NEXT: %i4.2 = add nuw nsw i64 %i4.1, 1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.2 +; EPILOG: latch.2: +; EPILOG-NEXT: %niter.nsub.2 = sub i64 %niter.nsub.1, 1 +; EPILOG-NEXT: %i4.3 = add nuw nsw i64 %i4.2, 1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.3 +; EPILOG: latch.3: +; EPILOG-NEXT: %niter.nsub.3 = sub i64 %niter.nsub.2, 1 +; EPILOG-NEXT: %i4.4 = add nuw nsw i64 %i4.3, 1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.4 +; EPILOG: latch.4: +; EPILOG-NEXT: %niter.nsub.4 = sub i64 %niter.nsub.3, 1 +; EPILOG-NEXT: %i4.5 = add nuw nsw i64 %i4.4, 1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.5 +; EPILOG: latch.5: +; EPILOG-NEXT: %niter.nsub.5 = sub i64 %niter.nsub.4, 1 +; EPILOG-NEXT: %i4.6 = add nuw nsw i64 %i4.5, 1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.6 +; EPILOG: latch.6: +; EPILOG-NEXT: %niter.nsub.6 = sub i64 %niter.nsub.5, 1 +; EPILOG-NEXT: %i4.7 = add nuw nsw i64 %i4.6, 1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 +; EPILOG: latch.7: +; EPILOG-NEXT: %niter.nsub.7 = sub i64 %niter.nsub.6, 1 +; EPILOG-NEXT: %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0 +; EPILOG-NEXT: br i1 %niter.ncmp.7, label %innerH, label %exit.unr-lcssa.loopexit ; ; EPILOG-BLOCK-LABEL: @test8( ; EPILOG-BLOCK-NEXT: bb: ; EPILOG-BLOCK-NEXT: br label %outerloop +; EPILOG-BLOCK: outerloop.loopexit.loopexit: +; EPILOG-BLOCK-NEXT: br label %outerloop.loopexit ; EPILOG-BLOCK: outerloop.loopexit: -; EPILOG-BLOCK-NEXT: br label %innerH.12 +; EPILOG-BLOCK-NEXT: br i1 false, label %exit.unr-lcssa.1, label %outerloop.new.1 ; EPILOG-BLOCK: outerloop: ; EPILOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ] +; EPILOG-BLOCK-NEXT: %0 = sub i64 100, %i +; EPILOG-BLOCK-NEXT: %1 = sub i64 99, %i +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 +; EPILOG-BLOCK-NEXT: br i1 %2, label %exit.unr-lcssa, label %outerloop.new +; EPILOG-BLOCK: outerloop.new: +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %innerH ; EPILOG-BLOCK: innerH: -; EPILOG-BLOCK-NEXT: %i3 = phi i64 [ %i, %outerloop ], [ %i4.1, %latch.1 ] +; EPILOG-BLOCK-NEXT: %i3 = phi i64 [ %i, %outerloop.new ], [ %i4.1, %latch.1 ] +; EPILOG-BLOCK-NEXT: %niter = phi i64 [ %unroll_iter, %outerloop.new ], [ %niter.nsub.1, %latch.1 ] ; EPILOG-BLOCK-NEXT: %i4 = add nuw nsw i64 %i3, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit, label %latch +; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch ; EPILOG-BLOCK: latch: -; EPILOG-BLOCK-NEXT: %i6 = icmp ult i64 %i4, 100 -; EPILOG-BLOCK-NEXT: br i1 %i6, label %innerH.1, label %exit.loopexit -; EPILOG-BLOCK: exit.loopexit: +; EPILOG-BLOCK-NEXT: %niter.nsub = sub i64 %niter, 1 +; EPILOG-BLOCK-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 +; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 +; EPILOG-BLOCK: exit.unr-lcssa.loopexit: +; EPILOG-BLOCK-NEXT: br label %exit.unr-lcssa +; EPILOG-BLOCK: exit.unr-lcssa: +; EPILOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 +; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod, label %innerH.epil.preheader, label %exit.loopexit +; EPILOG-BLOCK: innerH.epil.preheader: +; EPILOG-BLOCK-NEXT: br label %innerH.epil +; EPILOG-BLOCK: innerH.epil: +; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit, label %latch.epil +; EPILOG-BLOCK: latch.epil: ; EPILOG-BLOCK-NEXT: br label %exit -; EPILOG-BLOCK: exit.loopexit5: +; EPILOG-BLOCK: exit.loopexit: ; EPILOG-BLOCK-NEXT: br label %exit ; EPILOG-BLOCK: exit: ; EPILOG-BLOCK-NEXT: ret void -; EPILOG-BLOCK: innerH.1: -; EPILOG-BLOCK-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit, label %latch.1 ; EPILOG-BLOCK: latch.1: -; EPILOG-BLOCK-NEXT: %i6.1 = icmp ult i64 %i4.1, 100 -; EPILOG-BLOCK-NEXT: br i1 %i6.1, label %innerH, label %exit.loopexit, !llvm.loop !11 -; EPILOG-BLOCK: innerH.12: -; EPILOG-BLOCK-NEXT: %i3.1 = phi i64 [ 0, %outerloop.loopexit ], [ %i4.1.1, %latch.1.1 ] +; EPILOG-BLOCK-NEXT: %niter.nsub.1 = sub i64 %niter.nsub, 1 +; EPILOG-BLOCK-NEXT: %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0 +; EPILOG-BLOCK-NEXT: br i1 %niter.ncmp.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !11 +; EPILOG-BLOCK: outerloop.new.1: +; EPILOG-BLOCK-NEXT: br label %innerH.1 +; EPILOG-BLOCK: innerH.1: +; EPILOG-BLOCK-NEXT: %i3.1 = phi i64 [ 0, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ] +; EPILOG-BLOCK-NEXT: %niter.1 = phi i64 [ 100, %outerloop.new.1 ], [ %niter.nsub.1.1, %latch.1.1 ] ; EPILOG-BLOCK-NEXT: %i4.11 = add nuw nsw i64 %i3.1, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.1, label %latch.14 -; EPILOG-BLOCK: latch.14: -; EPILOG-BLOCK-NEXT: %i6.13 = icmp ult i64 %i4.11, 100 -; EPILOG-BLOCK-NEXT: br i1 %i6.13, label %innerH.1.1, label %exit.loopexit5 -; EPILOG-BLOCK: innerH.1.1: +; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.13 +; EPILOG-BLOCK: latch.13: +; EPILOG-BLOCK-NEXT: %niter.nsub.12 = sub i64 %niter.1, 1 ; EPILOG-BLOCK-NEXT: %i4.1.1 = add nuw nsw i64 %i4.11, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.1, label %latch.1.1 +; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1 ; EPILOG-BLOCK: latch.1.1: -; EPILOG-BLOCK-NEXT: %i6.1.1 = icmp ult i64 %i4.1.1, 100 -; EPILOG-BLOCK-NEXT: br i1 %i6.1.1, label %innerH.12, label %exit.loopexit5, !llvm.loop !11 +; EPILOG-BLOCK-NEXT: %niter.nsub.1.1 = sub i64 %niter.nsub.12, 1 +; EPILOG-BLOCK-NEXT: %niter.ncmp.1.1 = icmp ne i64 %niter.nsub.1.1, 0 +; EPILOG-BLOCK-NEXT: br i1 %niter.ncmp.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit.1, !llvm.loop !11 +; EPILOG-BLOCK: exit.unr-lcssa.loopexit.1: +; EPILOG-BLOCK-NEXT: br label %exit.unr-lcssa.1 +; EPILOG-BLOCK: outerloop.loopexit.loopexit.1: +; EPILOG-BLOCK-NEXT: br label %outerloop.loopexit.1 +; EPILOG-BLOCK: exit.unr-lcssa.1: +; EPILOG-BLOCK-NEXT: br i1 false, label %innerH.epil.preheader.1, label %exit.loopexit +; EPILOG-BLOCK: innerH.epil.preheader.1: +; EPILOG-BLOCK-NEXT: br label %innerH.epil.1 +; EPILOG-BLOCK: innerH.epil.1: +; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.1, label %latch.epil ; EPILOG-BLOCK: outerloop.loopexit.1: ; EPILOG-BLOCK-NEXT: br label %outerloop, !llvm.loop !12 ; @@ -4515,6 +4603,8 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-LABEL: @test9( ; EPILOG-NEXT: bb: +; EPILOG-NEXT: %0 = add i32 %n, -1 +; EPILOG-NEXT: %1 = add i32 %n, -2 ; EPILOG-NEXT: br label %outerloopHdr ; EPILOG: outerloopHdr: ; EPILOG-NEXT: %trip = add i32 %n, -1 @@ -4522,86 +4612,180 @@ ; EPILOG-NEXT: br i1 %outercnd, label %preheader, label %outerLatch ; EPILOG: preheader: ; EPILOG-NEXT: %i4 = zext i32 0 to i64 +; EPILOG-NEXT: %xtraiter = and i32 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i32 %1, 7 +; EPILOG-NEXT: br i1 %2, label %outerLatch.loopexit.unr-lcssa, label %preheader.new +; EPILOG: preheader.new: +; EPILOG-NEXT: %unroll_iter = sub i32 %0, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: -; EPILOG-NEXT: %phi = phi i64 [ %i4, %preheader ], [ %iv.next, %latch ] -; EPILOG-NEXT: %i7 = trunc i64 %phi to i32 -; EPILOG-NEXT: br i1 true, label %latch, label %innerexit +; EPILOG-NEXT: %phi = phi i64 [ %i4, %preheader.new ], [ %iv.next.7, %latch.7 ] +; EPILOG-NEXT: %niter = phi i32 [ %unroll_iter, %preheader.new ], [ %niter.nsub.7, %latch.7 ] +; EPILOG-NEXT: br i1 true, label %latch, label %innerexit.loopexit +; EPILOG: innerexit.loopexit: +; EPILOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ], [ %trip, %latch ], [ %trip, %latch.1 ], [ %trip, %latch.2 ], [ %trip, %latch.3 ], [ %trip, %latch.4 ], [ %trip, %latch.5 ], [ %trip, %latch.6 ] +; EPILOG-NEXT: br label %innerexit +; EPILOG: innerexit.loopexit1: +; EPILOG-NEXT: %trip.lcssa.ph2 = phi i32 [ %trip, %header.epil ] +; EPILOG-NEXT: br label %innerexit ; EPILOG: innerexit: -; EPILOG-NEXT: %trip.lcssa = phi i32 [ %trip, %header ] +; EPILOG-NEXT: %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph2, %innerexit.loopexit1 ] ; EPILOG-NEXT: %i9 = call i8 addrspace(1)* @foo(i32 %trip.lcssa) ; EPILOG-NEXT: ret i8 addrspace(1)* %i9 ; EPILOG: latch: -; EPILOG-NEXT: %i11 = add nsw i32 %i7, 1 -; EPILOG-NEXT: %innercnd = icmp slt i32 %i11, %trip ; EPILOG-NEXT: %iv.next = add nuw nsw i64 %phi, 1 -; EPILOG-NEXT: br i1 %innercnd, label %header, label %outerLatch.loopexit +; EPILOG-NEXT: %niter.nsub = sub i32 %niter, 1 +; EPILOG-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit +; EPILOG: outerLatch.loopexit.unr-lcssa.loopexit: +; EPILOG-NEXT: %phi.unr.ph = phi i64 [ %iv.next.7, %latch.7 ] +; EPILOG-NEXT: br label %outerLatch.loopexit.unr-lcssa +; EPILOG: outerLatch.loopexit.unr-lcssa: +; EPILOG-NEXT: %phi.unr = phi i64 [ %i4, %preheader ], [ %phi.unr.ph, %outerLatch.loopexit.unr-lcssa.loopexit ] +; EPILOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; EPILOG-NEXT: br i1 %lcmp.mod, label %header.epil.preheader, label %outerLatch.loopexit +; EPILOG: header.epil.preheader: +; EPILOG-NEXT: br label %header.epil +; EPILOG: header.epil: +; EPILOG-NEXT: %phi.epil = phi i64 [ %phi.unr, %header.epil.preheader ], [ %iv.next.epil, %latch.epil ] +; EPILOG-NEXT: %epil.iter = phi i32 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ] +; EPILOG-NEXT: %i7.epil = trunc i64 %phi.epil to i32 +; EPILOG-NEXT: br i1 true, label %latch.epil, label %innerexit.loopexit1 +; EPILOG: latch.epil: +; EPILOG-NEXT: %i11.epil = add nsw i32 %i7.epil, 1 +; EPILOG-NEXT: %innercnd.epil = icmp slt i32 %i11.epil, %trip +; EPILOG-NEXT: %iv.next.epil = add nuw nsw i64 %phi.epil, 1 +; EPILOG-NEXT: %epil.iter.sub = sub i32 %epil.iter, 1 +; EPILOG-NEXT: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0 +; EPILOG-NEXT: br i1 %epil.iter.cmp, label %header.epil, label %outerLatch.loopexit.epilog-lcssa, !llvm.loop !12 +; EPILOG: outerLatch.loopexit.epilog-lcssa: +; EPILOG-NEXT: br label %outerLatch.loopexit ; EPILOG: outerLatch.loopexit: ; EPILOG-NEXT: br label %outerLatch ; EPILOG: outerLatch: ; EPILOG-NEXT: br label %outerloopHdr +; EPILOG: latch.1: +; EPILOG-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 1 +; EPILOG-NEXT: %niter.nsub.1 = sub i32 %niter.nsub, 1 +; EPILOG-NEXT: br i1 true, label %latch.2, label %innerexit.loopexit +; EPILOG: latch.2: +; EPILOG-NEXT: %iv.next.2 = add nuw nsw i64 %iv.next.1, 1 +; EPILOG-NEXT: %niter.nsub.2 = sub i32 %niter.nsub.1, 1 +; EPILOG-NEXT: br i1 true, label %latch.3, label %innerexit.loopexit +; EPILOG: latch.3: +; EPILOG-NEXT: %iv.next.3 = add nuw nsw i64 %iv.next.2, 1 +; EPILOG-NEXT: %niter.nsub.3 = sub i32 %niter.nsub.2, 1 +; EPILOG-NEXT: br i1 true, label %latch.4, label %innerexit.loopexit +; EPILOG: latch.4: +; EPILOG-NEXT: %iv.next.4 = add nuw nsw i64 %iv.next.3, 1 +; EPILOG-NEXT: %niter.nsub.4 = sub i32 %niter.nsub.3, 1 +; EPILOG-NEXT: br i1 true, label %latch.5, label %innerexit.loopexit +; EPILOG: latch.5: +; EPILOG-NEXT: %iv.next.5 = add nuw nsw i64 %iv.next.4, 1 +; EPILOG-NEXT: %niter.nsub.5 = sub i32 %niter.nsub.4, 1 +; EPILOG-NEXT: br i1 true, label %latch.6, label %innerexit.loopexit +; EPILOG: latch.6: +; EPILOG-NEXT: %iv.next.6 = add nuw nsw i64 %iv.next.5, 1 +; EPILOG-NEXT: %niter.nsub.6 = sub i32 %niter.nsub.5, 1 +; EPILOG-NEXT: br i1 true, label %latch.7, label %innerexit.loopexit +; EPILOG: latch.7: +; EPILOG-NEXT: %iv.next.7 = add nuw nsw i64 %iv.next.6, 1 +; EPILOG-NEXT: %niter.nsub.7 = sub i32 %niter.nsub.6, 1 +; EPILOG-NEXT: %niter.ncmp.7 = icmp ne i32 %niter.nsub.7, 0 +; EPILOG-NEXT: br i1 %niter.ncmp.7, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit ; ; EPILOG-BLOCK-LABEL: @test9( ; EPILOG-BLOCK-NEXT: bb: +; EPILOG-BLOCK-NEXT: %0 = add i32 %n, -1 +; EPILOG-BLOCK-NEXT: %1 = add i32 %n, -2 ; EPILOG-BLOCK-NEXT: br label %outerloopHdr ; EPILOG-BLOCK: outerloopHdr: ; EPILOG-BLOCK-NEXT: %trip = add i32 %n, -1 ; EPILOG-BLOCK-NEXT: %outercnd = icmp slt i32 0, %trip ; EPILOG-BLOCK-NEXT: br i1 %outercnd, label %preheader, label %outerLatch ; EPILOG-BLOCK: preheader: +; EPILOG-BLOCK-NEXT: %xtraiter = and i32 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1 +; EPILOG-BLOCK-NEXT: br i1 %2, label %outerLatch.loopexit.unr-lcssa, label %preheader.new +; EPILOG-BLOCK: preheader.new: +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i32 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: -; EPILOG-BLOCK-NEXT: %phi = phi i64 [ 0, %preheader ], [ %iv.next.1, %latch.1 ] -; EPILOG-BLOCK-NEXT: %i7 = trunc i64 %phi to i32 -; EPILOG-BLOCK-NEXT: br i1 true, label %latch, label %innerexit.loopexit +; EPILOG-BLOCK-NEXT: %phi = phi i64 [ 0, %preheader.new ], [ %iv.next.1, %latch.1 ] +; EPILOG-BLOCK-NEXT: %niter = phi i32 [ %unroll_iter, %preheader.new ], [ %niter.nsub.1, %latch.1 ] +; EPILOG-BLOCK-NEXT: br i1 true, label %latch, label %innerexit.loopexit.loopexit +; EPILOG-BLOCK: innerexit.loopexit.loopexit: +; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ] +; EPILOG-BLOCK-NEXT: br label %innerexit.loopexit +; EPILOG-BLOCK: innerexit.loopexit.loopexit6: +; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph7 = phi i32 [ %trip.1, %latch.15 ], [ %trip.1, %header.1 ] +; EPILOG-BLOCK-NEXT: br label %innerexit.loopexit ; EPILOG-BLOCK: innerexit.loopexit: -; EPILOG-BLOCK-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header.1 ], [ %trip, %header ] +; EPILOG-BLOCK-NEXT: %trip.lcssa.ph = phi i32 [ %trip.lcssa.ph.ph, %innerexit.loopexit.loopexit ], [ %trip.lcssa.ph.ph7, %innerexit.loopexit.loopexit6 ] ; EPILOG-BLOCK-NEXT: br label %innerexit -; EPILOG-BLOCK: innerexit.loopexit7: -; EPILOG-BLOCK-NEXT: %trip.lcssa.ph8 = phi i32 [ %trip.1, %header.1.1 ], [ %trip.1, %header.12 ] +; EPILOG-BLOCK: innerexit.loopexit1: +; EPILOG-BLOCK-NEXT: %trip.lcssa.ph2 = phi i32 [ %trip, %header.epil ], [ %trip.1, %header.epil.1 ] ; EPILOG-BLOCK-NEXT: br label %innerexit ; EPILOG-BLOCK: innerexit: -; EPILOG-BLOCK-NEXT: %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph8, %innerexit.loopexit7 ] +; EPILOG-BLOCK-NEXT: %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph2, %innerexit.loopexit1 ] ; EPILOG-BLOCK-NEXT: %i9 = call i8 addrspace(1)* @foo(i32 %trip.lcssa) ; EPILOG-BLOCK-NEXT: ret i8 addrspace(1)* %i9 ; EPILOG-BLOCK: latch: -; EPILOG-BLOCK-NEXT: %i11 = add nuw nsw i32 %i7, 1 -; EPILOG-BLOCK-NEXT: %innercnd = icmp slt i32 %i11, %trip ; EPILOG-BLOCK-NEXT: %iv.next = add nuw nsw i64 %phi, 1 -; EPILOG-BLOCK-NEXT: br i1 %innercnd, label %header.1, label %outerLatch.loopexit +; EPILOG-BLOCK-NEXT: %niter.nsub = sub i32 %niter, 1 +; EPILOG-BLOCK-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit.loopexit +; EPILOG-BLOCK: outerLatch.loopexit.unr-lcssa.loopexit: +; EPILOG-BLOCK-NEXT: br label %outerLatch.loopexit.unr-lcssa +; EPILOG-BLOCK: outerLatch.loopexit.unr-lcssa: +; EPILOG-BLOCK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.epil.preheader, label %outerLatch.loopexit +; EPILOG-BLOCK: header.epil.preheader: +; EPILOG-BLOCK-NEXT: br label %header.epil +; EPILOG-BLOCK: header.epil: +; EPILOG-BLOCK-NEXT: br i1 true, label %latch.epil, label %innerexit.loopexit1 +; EPILOG-BLOCK: latch.epil: +; EPILOG-BLOCK-NEXT: br label %outerLatch.loopexit ; EPILOG-BLOCK: outerLatch.loopexit: ; EPILOG-BLOCK-NEXT: br label %outerLatch ; EPILOG-BLOCK: outerLatch: ; EPILOG-BLOCK-NEXT: %trip.1 = add i32 %n, -1 ; EPILOG-BLOCK-NEXT: %outercnd.1 = icmp slt i32 0, %trip.1 ; EPILOG-BLOCK-NEXT: br i1 %outercnd.1, label %preheader.1, label %outerLatch.1 -; EPILOG-BLOCK: header.1: -; EPILOG-BLOCK-NEXT: %i7.1 = trunc i64 %iv.next to i32 -; EPILOG-BLOCK-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit ; EPILOG-BLOCK: latch.1: -; EPILOG-BLOCK-NEXT: %i11.1 = add nsw i32 %i7.1, 1 -; EPILOG-BLOCK-NEXT: %innercnd.1 = icmp slt i32 %i11.1, %trip ; EPILOG-BLOCK-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 1 -; EPILOG-BLOCK-NEXT: br i1 %innercnd.1, label %header, label %outerLatch.loopexit, !llvm.loop !13 +; EPILOG-BLOCK-NEXT: %niter.nsub.1 = sub i32 %niter.nsub, 1 +; EPILOG-BLOCK-NEXT: %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0 +; EPILOG-BLOCK-NEXT: br i1 %niter.ncmp.1, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit, !llvm.loop !13 ; EPILOG-BLOCK: preheader.1: -; EPILOG-BLOCK-NEXT: br label %header.12 -; EPILOG-BLOCK: header.12: -; EPILOG-BLOCK-NEXT: %phi.1 = phi i64 [ 0, %preheader.1 ], [ %iv.next.1.1, %latch.1.1 ] -; EPILOG-BLOCK-NEXT: %i7.11 = trunc i64 %phi.1 to i32 -; EPILOG-BLOCK-NEXT: br i1 true, label %latch.16, label %innerexit.loopexit7 -; EPILOG-BLOCK: latch.16: -; EPILOG-BLOCK-NEXT: %i11.13 = add nuw nsw i32 %i7.11, 1 -; EPILOG-BLOCK-NEXT: %innercnd.14 = icmp slt i32 %i11.13, %trip.1 -; EPILOG-BLOCK-NEXT: %iv.next.15 = add nuw nsw i64 %phi.1, 1 -; EPILOG-BLOCK-NEXT: br i1 %innercnd.14, label %header.1.1, label %outerLatch.loopexit.1 -; EPILOG-BLOCK: header.1.1: -; EPILOG-BLOCK-NEXT: %i7.1.1 = trunc i64 %iv.next.15 to i32 -; EPILOG-BLOCK-NEXT: br i1 true, label %latch.1.1, label %innerexit.loopexit7 +; EPILOG-BLOCK-NEXT: %xtraiter.1 = and i32 %0, 1 +; EPILOG-BLOCK-NEXT: %3 = icmp ult i32 %1, 1 +; EPILOG-BLOCK-NEXT: br i1 %3, label %outerLatch.loopexit.unr-lcssa.1, label %preheader.new.1 +; EPILOG-BLOCK: preheader.new.1: +; EPILOG-BLOCK-NEXT: %unroll_iter.1 = sub i32 %0, %xtraiter.1 +; EPILOG-BLOCK-NEXT: br label %header.1 +; EPILOG-BLOCK: header.1: +; EPILOG-BLOCK-NEXT: %phi.1 = phi i64 [ 0, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ] +; EPILOG-BLOCK-NEXT: %niter.1 = phi i32 [ %unroll_iter.1, %preheader.new.1 ], [ %niter.nsub.1.1, %latch.1.1 ] +; EPILOG-BLOCK-NEXT: br i1 true, label %latch.15, label %innerexit.loopexit.loopexit6 +; EPILOG-BLOCK: latch.15: +; EPILOG-BLOCK-NEXT: %iv.next.13 = add nuw nsw i64 %phi.1, 1 +; EPILOG-BLOCK-NEXT: %niter.nsub.14 = sub i32 %niter.1, 1 +; EPILOG-BLOCK-NEXT: br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit6 ; EPILOG-BLOCK: latch.1.1: -; EPILOG-BLOCK-NEXT: %i11.1.1 = add nsw i32 %i7.1.1, 1 -; EPILOG-BLOCK-NEXT: %innercnd.1.1 = icmp slt i32 %i11.1.1, %trip.1 -; EPILOG-BLOCK-NEXT: %iv.next.1.1 = add nuw nsw i64 %iv.next.15, 1 -; EPILOG-BLOCK-NEXT: br i1 %innercnd.1.1, label %header.12, label %outerLatch.loopexit.1, !llvm.loop !13 +; EPILOG-BLOCK-NEXT: %iv.next.1.1 = add nuw nsw i64 %iv.next.13, 1 +; EPILOG-BLOCK-NEXT: %niter.nsub.1.1 = sub i32 %niter.nsub.14, 1 +; EPILOG-BLOCK-NEXT: %niter.ncmp.1.1 = icmp ne i32 %niter.nsub.1.1, 0 +; EPILOG-BLOCK-NEXT: br i1 %niter.ncmp.1.1, label %header.1, label %outerLatch.loopexit.unr-lcssa.loopexit.1, !llvm.loop !13 +; EPILOG-BLOCK: outerLatch.loopexit.unr-lcssa.loopexit.1: +; EPILOG-BLOCK-NEXT: br label %outerLatch.loopexit.unr-lcssa.1 +; EPILOG-BLOCK: outerLatch.loopexit.unr-lcssa.1: +; EPILOG-BLOCK-NEXT: %lcmp.mod.1 = icmp ne i32 %xtraiter.1, 0 +; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod.1, label %header.epil.preheader.1, label %outerLatch.loopexit.1 +; EPILOG-BLOCK: header.epil.preheader.1: +; EPILOG-BLOCK-NEXT: br label %header.epil.1 +; EPILOG-BLOCK: header.epil.1: +; EPILOG-BLOCK-NEXT: br i1 true, label %latch.epil.1, label %innerexit.loopexit1 +; EPILOG-BLOCK: latch.epil.1: +; EPILOG-BLOCK-NEXT: br label %outerLatch.loopexit.1 ; EPILOG-BLOCK: outerLatch.loopexit.1: ; EPILOG-BLOCK-NEXT: br label %outerLatch.1 ; EPILOG-BLOCK: outerLatch.1: