Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -623,19 +624,40 @@ if (!SE) return false; - // Only unroll loops with a computable trip count, and the trip count needs - // to be an int value (allowing a pointer type is a TODO item). - // We calculate the backedge count by using getExitCount on the Latch block, - // which is proven to be the only exiting block in this loop. This is same as - // calculating getBackedgeTakenCount on the loop (which computes SCEV for all - // exiting blocks). - const SCEV *BECountSC = SE->getExitCount(L, Latch); - if (isa(BECountSC) || - !BECountSC->getType()->isIntegerTy()) { + // When generating the iteration counts for the loops, we use the symbolic + // max exit count as this bounds all (computable) exits and thus allows us + // to discharge those exits in the main loop. If this turns out to require + // expensive expansions and we're not allowed said, we fallback to unrolling + // based on the latch count only and not pruning other exits. + + // We need to know that the latch is one of the computable exits since + // we rewrite the latch exit test. + auto LatchEC = SE->getExitCount(L, Latch); + if (isa(LatchEC) || + !LatchEC->getType()->isIntegerTy()) { LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + SmallVector ExitCounts; + SmallVector ExitingBlocksToPrune; + for (auto *ExitingBB : ExitingBlocks) { + if (!DT || !DT->dominates(ExitingBB, Latch)) + continue; + auto EC = SE->getExitCount(L, ExitingBB); + if (isa(EC) || !EC->getType()->isIntegerTy()) + continue; + ExitCounts.push_back(EC); + ExitingBlocksToPrune.push_back(ExitingBB); + } + // We haven't implemented multiple exit branch folding for the prolog + // case yet. Since we as of yet get no simplification benefit, keep + // using only the latch exit count. + const SCEV *BECountSC = UseEpilogRemainder ? + SE->getUMinFromMismatchedTypes(ExitCounts) : LatchEC; + unsigned BEWidth = cast(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. @@ -654,8 +676,16 @@ if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget, TTI, PreHeaderBR)) { - LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); - return false; + // Give up on eliminating multiple exits, and just eliminate the latch + // TODO: reconsider this heuristic entirely. + BECountSC = LatchEC; + TripCountSC = SE->getAddExpr(BECountSC, SE->getOne(BECountSC->getType())); + ExitingBlocksToPrune.clear(); + if (Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget, + TTI, PreHeaderBR)) { + LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); + return false; + } } // This constraint lets us deal with an overflowing trip count easily; see the @@ -908,6 +938,37 @@ NewIdx->addIncoming(Zero, NewPreHeader); NewIdx->addIncoming(IdxNext, Latch); LatchBR->setCondition(IdxCmp); + + // Using the information we saved above, rewrite non-latch exits in the + // main loop which we know can't be taken to a form that the main unroller + // logic will constant fold. We shouldn't need this in theory, but in + // practice, SCEV is not strong enough to prune these exits. + Value *IdxUnrolled = nullptr; // lazily computed + auto *CountC = ConstantInt::get(TripCount->getType(), Count); + auto *CountMinusOneC = ConstantInt::get(TripCount->getType(), Count-1); + for (auto *ExitingBB : ExitingBlocksToPrune) { + if (Latch == ExitingBB) + continue; + auto *BR = dyn_cast(ExitingBB->getTerminator()); + if (!BR) continue; + + if (!IdxUnrolled) { + B2.SetInsertPoint(&*std::next(NewIdx->getIterator())); + IdxUnrolled = B2.CreateURem(NewIdx, CountC); + } + + B2.SetInsertPoint(BR); + auto *Cond = BR->getCondition(); + if (LatchBR->getSuccessor(0) == Header) { + auto *IdxCmp = B2.CreateICmpNE(IdxUnrolled, CountMinusOneC); + auto *NewCond = B2.CreateOr(IdxCmp, Cond); + BR->setCondition(NewCond); + } else { + auto *IdxCmp = B2.CreateICmpEQ(IdxUnrolled, CountMinusOneC); + auto *NewCond = B2.CreateAnd(IdxCmp, Cond); + BR->setCondition(NewCond); + } + } } else { // Connect the prolog code to the original loop and update the // PHI functions. Index: llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -3159,21 +3159,24 @@ ; ; EPILOG-BLOCK-LABEL: @unique_exit( ; EPILOG-BLOCK-NEXT: preheader: -; EPILOG-BLOCK-NEXT: %0 = shl i32 %M, 3 -; EPILOG-BLOCK-NEXT: %umax = call i32 @llvm.umax.i32(i32 %0, i32 1) -; EPILOG-BLOCK-NEXT: %1 = add i32 %umax, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i32 %umax, 1 -; EPILOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1 -; EPILOG-BLOCK-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %preheader.new +; EPILOG-BLOCK-NEXT: %umax = call i32 @llvm.umax.i32(i32 %N, i32 1) +; EPILOG-BLOCK-NEXT: %0 = add i32 %umax, -1 +; EPILOG-BLOCK-NEXT: %1 = shl i32 %M, 3 +; EPILOG-BLOCK-NEXT: %umax1 = call i32 @llvm.umax.i32(i32 %1, i32 1) +; EPILOG-BLOCK-NEXT: %2 = add i32 %umax1, -1 +; EPILOG-BLOCK-NEXT: %umin = call i32 @llvm.umin.i32(i32 %0, i32 %2) +; EPILOG-BLOCK-NEXT: %3 = add nuw i32 %umin, 1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i32 %3, 1 +; EPILOG-BLOCK-NEXT: %4 = icmp ult i32 %umin, 1 +; EPILOG-BLOCK-NEXT: br i1 %4, label %latchExit.unr-lcssa, label %preheader.new ; EPILOG-BLOCK: preheader.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i32 %umax, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i32 %3, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %i4 = phi i32 [ 0, %preheader.new ], [ %inc.1, %latch.1 ] ; EPILOG-BLOCK-NEXT: %niter = phi i32 [ 0, %preheader.new ], [ %niter.next.1, %latch.1 ] ; EPILOG-BLOCK-NEXT: %inc = add nuw nsw i32 %i4, 1 -; EPILOG-BLOCK-NEXT: %cmp1 = icmp ult i32 %inc, %N -; EPILOG-BLOCK-NEXT: br i1 %cmp1, label %latch, label %latchExit.epilog-lcssa.loopexit +; EPILOG-BLOCK-NEXT: br i1 true, label %latch, label %latchExit.epilog-lcssa.loopexit ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i32 %niter, 1 ; EPILOG-BLOCK-NEXT: %inc.1 = add nuw i32 %inc, 1 @@ -3201,13 +3204,13 @@ ; EPILOG-BLOCK: latch.epil: ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit: -; EPILOG-BLOCK-NEXT: %i2.ph.ph1.ph = phi i32 [ %i4, %header ], [ %inc, %latch ] +; EPILOG-BLOCK-NEXT: %i2.ph.ph2.ph = phi i32 [ %i4, %header ], [ %inc, %latch ] ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa: -; EPILOG-BLOCK-NEXT: %i2.ph.ph1 = phi i32 [ -1, %latch.epil ], [ %i4.unr, %header.epil ], [ %i2.ph.ph1.ph, %latchExit.epilog-lcssa.loopexit ] +; EPILOG-BLOCK-NEXT: %i2.ph.ph2 = phi i32 [ -1, %latch.epil ], [ %i4.unr, %header.epil ], [ %i2.ph.ph2.ph, %latchExit.epilog-lcssa.loopexit ] ; EPILOG-BLOCK-NEXT: br label %latchExit ; EPILOG-BLOCK: latchExit: -; EPILOG-BLOCK-NEXT: %i2.ph = phi i32 [ %i2.ph.ph, %latchExit.unr-lcssa ], [ %i2.ph.ph1, %latchExit.epilog-lcssa ] +; EPILOG-BLOCK-NEXT: %i2.ph = phi i32 [ %i2.ph.ph, %latchExit.unr-lcssa ], [ %i2.ph.ph2, %latchExit.epilog-lcssa ] ; EPILOG-BLOCK-NEXT: ret void ; ; PROLOG-LABEL: @unique_exit( @@ -5306,54 +5309,49 @@ ; EPILOG-LABEL: @test10( ; EPILOG-NEXT: entry: ; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: br i1 %1, label %exit2.unr-lcssa, label %entry.new +; EPILOG-NEXT: %umin = call i64 @llvm.umin.i64(i64 %trip2, i64 %0) +; EPILOG-NEXT: %1 = add i64 %umin, 1 +; EPILOG-NEXT: %xtraiter = and i64 %1, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %umin, 7 +; EPILOG-NEXT: br i1 %2, label %exit2.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %1, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] ; EPILOG-NEXT: %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ] ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early = icmp ne i64 %iv, %trip2 -; EPILOG-NEXT: br i1 %cmp_early, label %loop_latch, label %exit1.loopexit +; EPILOG-NEXT: br i1 true, label %loop_latch, label %exit1.loopexit ; EPILOG: loop_latch: ; EPILOG-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.1, label %loop_latch.1, label %exit1.loopexit +; EPILOG-NEXT: br i1 true, label %loop_latch.1, label %exit1.loopexit ; EPILOG: loop_latch.1: ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.2, label %loop_latch.2, label %exit1.loopexit +; EPILOG-NEXT: br i1 true, label %loop_latch.2, label %exit1.loopexit ; EPILOG: loop_latch.2: ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.3, label %loop_latch.3, label %exit1.loopexit +; EPILOG-NEXT: br i1 true, label %loop_latch.3, label %exit1.loopexit ; EPILOG: loop_latch.3: ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.4, label %loop_latch.4, label %exit1.loopexit +; EPILOG-NEXT: br i1 true, label %loop_latch.4, label %exit1.loopexit ; EPILOG: loop_latch.4: ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.5, label %loop_latch.5, label %exit1.loopexit +; EPILOG-NEXT: br i1 true, label %loop_latch.5, label %exit1.loopexit ; EPILOG: loop_latch.5: ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.6, label %loop_latch.6, label %exit1.loopexit +; EPILOG-NEXT: br i1 true, label %loop_latch.6, label %exit1.loopexit ; EPILOG: loop_latch.6: ; EPILOG-NEXT: %iv_next.6 = add nuw nsw i64 %iv_next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 @@ -5400,18 +5398,19 @@ ; EPILOG-BLOCK-LABEL: @test10( ; EPILOG-BLOCK-NEXT: entry: ; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: br i1 %1, label %exit2.unr-lcssa, label %entry.new +; EPILOG-BLOCK-NEXT: %umin = call i64 @llvm.umin.i64(i64 %trip2, i64 %0) +; EPILOG-BLOCK-NEXT: %1 = add i64 %umin, 1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %1, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %umin, 1 +; EPILOG-BLOCK-NEXT: br i1 %2, label %exit2.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %1, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: call void @bar() -; EPILOG-BLOCK-NEXT: %cmp_early = icmp ne i64 %iv, %trip2 -; EPILOG-BLOCK-NEXT: br i1 %cmp_early, label %loop_latch, label %exit1.loopexit +; EPILOG-BLOCK-NEXT: br i1 true, label %loop_latch, label %exit1.loopexit ; EPILOG-BLOCK: loop_latch: ; EPILOG-BLOCK-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 @@ -5865,66 +5864,61 @@ ; EPILOG-LABEL: @test12( ; EPILOG-NEXT: entry: ; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: br i1 %1, label %exit1.unr-lcssa, label %entry.new +; EPILOG-NEXT: %umin = call i64 @llvm.umin.i64(i64 %trip2, i64 %0) +; EPILOG-NEXT: %1 = add i64 %umin, 1 +; EPILOG-NEXT: %xtraiter = and i64 %1, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %umin, 7 +; EPILOG-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %1, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] ; EPILOG-NEXT: %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ] ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early = icmp ne i64 %iv, %trip2 -; EPILOG-NEXT: br i1 %cmp_early, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2: ; EPILOG-NEXT: br i1 %cond, label %loop_latch, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch: ; EPILOG-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.1: ; EPILOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.1: ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.2, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.2: ; EPILOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.2: ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.3, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.3: ; EPILOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.3: ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.4, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.4: ; EPILOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.4: ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.5, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.5: ; EPILOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.5: ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.6, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.6: ; EPILOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.6: @@ -5975,18 +5969,19 @@ ; EPILOG-BLOCK-LABEL: @test12( ; EPILOG-BLOCK-NEXT: entry: ; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: br i1 %1, label %exit1.unr-lcssa, label %entry.new +; EPILOG-BLOCK-NEXT: %umin = call i64 @llvm.umin.i64(i64 %trip2, i64 %0) +; EPILOG-BLOCK-NEXT: %1 = add i64 %umin, 1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %1, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %umin, 1 +; EPILOG-BLOCK-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %1, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: call void @bar() -; EPILOG-BLOCK-NEXT: %cmp_early = icmp ne i64 %iv, %trip2 -; EPILOG-BLOCK-NEXT: br i1 %cmp_early, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit +; EPILOG-BLOCK-NEXT: br i1 true, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit ; EPILOG-BLOCK: loop_exiting_bb2: ; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch, label %exit1.epilog-lcssa.loopexit ; EPILOG-BLOCK: loop_latch: @@ -6201,18 +6196,19 @@ ; EPILOG-LABEL: @test13( ; EPILOG-NEXT: entry: ; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: br i1 %1, label %exit1.unr-lcssa, label %entry.new +; EPILOG-NEXT: %umin = call i64 @llvm.umin.i64(i64 %trip2, i64 %0) +; EPILOG-NEXT: %1 = add i64 %umin, 1 +; EPILOG-NEXT: %xtraiter = and i64 %1, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %umin, 7 +; EPILOG-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %1, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] ; EPILOG-NEXT: %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ] ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early = icmp ne i64 %iv, %trip2 -; EPILOG-NEXT: br i1 %cmp_early, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2: ; EPILOG-NEXT: %unknown = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown, label %loop_latch, label %exit1.epilog-lcssa.loopexit @@ -6220,8 +6216,7 @@ ; EPILOG-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.1: ; EPILOG-NEXT: %unknown.1 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit @@ -6229,8 +6224,7 @@ ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.2, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.2: ; EPILOG-NEXT: %unknown.2 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.2, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit @@ -6238,8 +6232,7 @@ ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.3, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.3: ; EPILOG-NEXT: %unknown.3 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.3, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit @@ -6247,8 +6240,7 @@ ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.4, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.4: ; EPILOG-NEXT: %unknown.4 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.4, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit @@ -6256,8 +6248,7 @@ ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.5, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.5: ; EPILOG-NEXT: %unknown.5 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.5, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit @@ -6265,8 +6256,7 @@ ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2 -; EPILOG-NEXT: br i1 %cmp_early.6, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br i1 true, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.6: ; EPILOG-NEXT: %unknown.6 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.6, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit @@ -6320,18 +6310,19 @@ ; EPILOG-BLOCK-LABEL: @test13( ; EPILOG-BLOCK-NEXT: entry: ; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: br i1 %1, label %exit1.unr-lcssa, label %entry.new +; EPILOG-BLOCK-NEXT: %umin = call i64 @llvm.umin.i64(i64 %trip2, i64 %0) +; EPILOG-BLOCK-NEXT: %1 = add i64 %umin, 1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %1, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %umin, 1 +; EPILOG-BLOCK-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %1, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: call void @bar() -; EPILOG-BLOCK-NEXT: %cmp_early = icmp ne i64 %iv, %trip2 -; EPILOG-BLOCK-NEXT: br i1 %cmp_early, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit +; EPILOG-BLOCK-NEXT: br i1 true, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit ; EPILOG-BLOCK: loop_exiting_bb2: ; EPILOG-BLOCK-NEXT: %unknown = call i1 @unknown_cond() ; EPILOG-BLOCK-NEXT: br i1 %unknown, label %loop_latch, label %exit1.epilog-lcssa.loopexit