Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -898,7 +898,8 @@ // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); - Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); + Value *Mask = B2.CreateNot(ModVal); + Value *TestVal = B2.CreateAnd(TripCount, Mask, "unroll_iter"); BranchInst *LatchBR = cast(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Index: test/Transforms/LoopUnroll/nuw-backedge.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/nuw-backedge.ll @@ -0,0 +1,48 @@ +; RUN: opt -loop-unroll -unroll-runtime -unroll-count=4 %s -S -o - | FileCheck %s + +; CHECK-LABEL: nuw_trip_count +; CHECK: entry: +; CHECK: [[LOOP_GUARD:%[^ ]+]] = icmp eq i32 %N, 0 +; CHECK: br i1 [[LOOP_GUARD]], label {{.*}}, label %for.body.preheader + +; CHECK: for.body.preheader: +; CHECK: [[BECOUNT:%[^ ]+]] = add i32 %N, -1 +; CHECK: %xtraiter = and i32 %N, 3 +; CHECK: [[UNROLL_GUARD:%[^ ]+]] = icmp ult i32 [[BECOUNT]], 3 +; CHECK: br i1 [[UNROLL_GUARD]], label {{.*}}, label %for.body.preheader.new + +; CHECK: for.body.preheader.new: +; CHECK: [[XOR:%[^ ]+]] = xor i32 %xtraiter, -1 +; CHECK: %unroll_iter = and i32 %N, [[XOR]] +; CHECK: br label %for.body + +; CHECK: for.body: +; CHECK: %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ [[NSUB3:%[^ ]+]], %for.body ] +; CHECK: [[NSUB:%[^ ]+]] = sub i32 %niter, 1 +; CHECK: [[NSUB1:%[^ ]+]] = sub i32 [[NSUB]], 1 +; CHECK: [[NSUB2:%[^ ]+]] = sub i32 [[NSUB1]], 1 +; CHECK: [[NSUB3:%[^ ]+]] = sub i32 [[NSUB2]], 1 +; CHECK: [[NCMP:%[^ ]+]] = icmp eq i32 [[NSUB3]], 0 +; CHECK: br i1 [[NCMP]], label {{.*}}, label %for.body + +define void @nuw_trip_count(i32* nocapture readonly %a, i32* nocapture readonly %b, i32* noalias nocapture %c, i32 %N) { +entry: + %cmp8 = icmp eq i32 %N, 0 + br i1 %cmp8, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.09 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.09 + %1 = load i32, i32* %arrayidx1, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.09 + store i32 %mul, i32* %arrayidx2, align 4 + %inc = add nuw i32 %i.09, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} Index: test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -19,7 +19,7 @@ ; EPILOG-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 ; EPILOG-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] ; EPILOG: entry.new: -; EPILOG-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]] +; EPILOG-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TRIP]], -8 ; EPILOG-NEXT: br label [[LOOP_HEADER:%.*]] ; EPILOG: loop_latch.epil: ; EPILOG-NEXT: %epil.iter.sub = add i64 %epil.iter, -1 @@ -147,7 +147,7 @@ ; EPILOG-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 ; EPILOG-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]] +; EPILOG-NEXT: %unroll_iter = and i64 [[TRIP]], -8 ; EPILOG-NEXT: br label [[LOOP_HEADER:%.*]] ; EPILOG: loop_header: ; EPILOG-NEXT: %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ] Index: test/Transforms/LoopUnroll/runtime-unroll-remainder.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-unroll-remainder.ll +++ test/Transforms/LoopUnroll/runtime-unroll-remainder.ll @@ -21,7 +21,7 @@ ; CHECK: br i1 [[CMP]], label %[[CLEANUP:.*]], label %for.body.lr.ph.new ; CHECK-LABEL: for.body.lr.ph.new: -; CHECK: %unroll_iter = sub nsw i64 %wide.trip.count, %xtraiter +; CHECK: %unroll_iter = and i64 %wide.trip.count, 4294967292 ; CHECK: br label %for.body ; CHECK: [[CLEANUP]]: Index: test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll =================================================================== --- test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll +++ test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll @@ -16,7 +16,8 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 ; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]] ; CHECK: for.outer.preheader.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[XTRAITER]], -1 +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i32 [[I]], [[XOR]] ; CHECK-NEXT: br label [[FOR_OUTER:%.*]] ; CHECK: for.outer: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ] @@ -192,13 +193,13 @@ ; CHECK: br label %for.inner ; CHECK: for.inner: ; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] -; CHECK: %sum = phi i32 [ %2, %for.outer ], [ %add, %for.inner ] +; CHECK: %sum = phi i32 [ %3, %for.outer ], [ %add, %for.inner ] ; CHECK: %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ] -; CHECK: %sum.1 = phi i32 [ %3, %for.outer ], [ %add.1, %for.inner ] +; CHECK: %sum.1 = phi i32 [ %4, %for.outer ], [ %add.1, %for.inner ] ; CHECK: %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ] -; CHECK: %sum.2 = phi i32 [ %4, %for.outer ], [ %add.2, %for.inner ] +; CHECK: %sum.2 = phi i32 [ %5, %for.outer ], [ %add.2, %for.inner ] ; CHECK: %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ] -; CHECK: %sum.3 = phi i32 [ %5, %for.outer ], [ %add.3, %for.inner ] +; CHECK: %sum.3 = phi i32 [ %6, %for.outer ], [ %add.3, %for.inner ] ; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner ; CHECK: for.latch: ; CHECK: %add.lcssa = phi i32 [ %add, %for.inner ] Index: test/Transforms/PhaseOrdering/reassociate-after-unroll.ll =================================================================== --- test/Transforms/PhaseOrdering/reassociate-after-unroll.ll +++ test/Transforms/PhaseOrdering/reassociate-after-unroll.ll @@ -19,7 +19,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 ; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]] ; CHECK: for.body.lr.ph.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[LIMIT]], [[XTRAITER]] +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[LIMIT]], -8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit.unr-lcssa: ; CHECK-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ] @@ -66,7 +66,7 @@ ; NPM-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 ; NPM-NEXT: br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]] ; NPM: for.body.lr.ph.new: -; NPM-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[LIMIT]], [[XTRAITER]] +; NPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[LIMIT]], -8 ; NPM-NEXT: [[AND_0:%.*]] = and i64 [[CONV]], 1 ; NPM-NEXT: br label [[FOR_BODY:%.*]] ; NPM: for.cond.cleanup.loopexit.unr-lcssa: