Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -719,8 +719,23 @@ PreHeaderBR = cast(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); - Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), - PreHeaderBR); + + Value *BECount = nullptr; + auto *Zero = cast(ConstantInt::get(TripCountSC->getType(), 0)); + const SCEV *ZeroSC = SE->getConstant(Zero); + if (SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, TripCountSC, ZeroSC)) { + // BECount == TripCount - 1, so + // if TripCount > 0, then + // BECount is NUW + auto *MinusOne = cast(ConstantInt::get(TripCountSC->getType(), -1)); + const SCEV *MinusOneSC = SE->getConstant(MinusOne); + const SCEV *BECountSCNoWrap = SE->getAddExpr(TripCountSC, MinusOneSC, + SCEV::FlagNUW); + BECount = Expander.expandCodeFor(BECountSCNoWrap, BECountSCNoWrap->getType(), + PreHeaderBR); + } else + BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), + PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. Index: test/Transforms/IndVarSimplify/lftr.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr.ll +++ test/Transforms/IndVarSimplify/lftr.ll @@ -202,7 +202,7 @@ ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP1]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I1:%.*]] = phi i8 [ [[I1_INC:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] Index: test/Transforms/LoopUnroll/nuw-backedge.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/nuw-backedge.ll @@ -0,0 +1,30 @@ +; RUN: opt -loop-unroll -unroll-runtime -unroll-count=4 %s -S -o - | FileCheck %s + +; CHECK-LABEL: nuw_trip_count +; CHECK: for.body.preheader: +; CHECK-NEXT: [[BECOUNT:%[^ ]+]] = add nuw i32 %N, -1 +; CHECK-NEXT: %xtraiter = and i32 %N, 3 +; CHECK-NEXT: [[CMP:%[^ ]+]] = icmp ult i32 [[BECOUNT]], 3 +; CHECK-NEXT:br i1 [[CMP]], label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new +define void @nuw_trip_count(i32* nocapture readonly %a, i32* nocapture readonly %b, i32* noalias nocapture %c, i32 %N) { +entry: + %cmp8 = icmp eq i32 %N, 0 + br i1 %cmp8, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.09 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.09 + %1 = load i32, i32* %arrayidx1, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.09 + store i32 %mul, i32* %arrayidx2, align 4 + %inc = add nuw i32 %i.09, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + Index: test/Transforms/LoopUnroll/runtime-unroll-remainder.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-unroll-remainder.ll +++ test/Transforms/LoopUnroll/runtime-unroll-remainder.ll @@ -15,10 +15,8 @@ ret i32 %c.0.lcssa ; CHECK-LABEL: for.body.lr.ph -; CHECK: [[COUNT:%[a-z.0-9]+]] = add nsw i64 %wide.trip.count, -1 ; CHECK: %xtraiter = and i64 %wide.trip.count, 3 -; CHECK: [[CMP:%[a-z.0-9]+]] = icmp ult i64 [[COUNT]], 3 -; CHECK: br i1 [[CMP]], label %[[CLEANUP:.*]], label %for.body.lr.ph.new +; CHECK: br i1 false, label %[[CLEANUP:.*]], label %for.body.lr.ph.new ; CHECK-LABEL: for.body.lr.ph.new: ; CHECK: %unroll_iter = sub nsw i64 %wide.trip.count, %xtraiter Index: test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll =================================================================== --- test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll +++ test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMPJ]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.outer.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I]], -1 +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[I]], -1 ; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[I]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 ; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]]