diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -11792,9 +11792,34 @@ SmallVector Operands{AR->operands()}; Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); - - setNoWrapFlags(const_cast(AR), Flags); } + + auto canProveNUW = [&]() { + if (!isLoopInvariant(RHS, L)) + return false; + + if (!isKnownNonZero(AR->getStepRecurrence(*this))) + // We need the sequence defined by AR to strictly increase in the + // unsigned integer domain for the logic below to hold. + return false; + + const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType()); + const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType()); + // If RHS <=u Limit, then there must exist a value V in the sequence + // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and + // V <=u UINT_MAX. Thus, we must exit the loop before unsigned + // overflow occurs. This limit also implies that a signed comparison + // (in the wide bitwidth) is equivalent to an unsigned comparison as + // the high bits on both sides must be zero. + APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this)); + APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1); + Limit = Limit.zext(OuterBitWidth); + return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit); + }; + if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW()) + Flags = setFlags(Flags, SCEV::FlagNUW); + + setNoWrapFlags(const_cast(AR), Flags); if (AR->hasNoUnsignedWrap()) { // Emulate what getZeroExtendExpr would have done during construction // if we'd been able to infer the fact just above at that time. diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll --- a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll @@ -279,11 +279,11 @@ define void @rhs_narrow_range(i16 %n.raw) { ; CHECK-LABEL: 'rhs_narrow_range' ; CHECK-NEXT: Determining loop execution counts for: @rhs_narrow_range -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16)))) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 253 ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16)))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: %n = and i16 %n.raw, 254 @@ -301,6 +301,150 @@ ret void } +define void @ugt_constant_rhs(i16 %n.raw, i8 %start) mustprogress { +; +; CHECK-LABEL: 'ugt_constant_rhs' +; CHECK-NEXT: Determining loop execution counts for: @ugt_constant_rhs +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ugt i16 %zext, 254 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_constant_rhs(i16 %n.raw, i8 %start) { +; +; CHECK-LABEL: 'ult_constant_rhs' +; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs +; CHECK-NEXT: Loop %for.body: backedge-taken count is (255 + (-1 * (zext i8 (1 + %start) to i16))) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 255 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (255 + (-1 * (zext i8 (1 + %start) to i16))) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 255 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_constant_rhs_stride2(i16 %n.raw, i8 %start) { +; +; CHECK-LABEL: 'ult_constant_rhs_stride2' +; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs_stride2 +; CHECK-NEXT: Loop %for.body: backedge-taken count is ((1 + (-1 * (zext i8 (2 + %start) to i16)) + (254 umax (zext i8 (2 + %start) to i16))) /u 2) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 127 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((1 + (-1 * (zext i8 (2 + %start) to i16)) + (254 umax (zext i8 (2 + %start) to i16))) /u 2) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 2 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 254 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_constant_rhs_stride2_neg(i16 %n.raw, i8 %start) { +; +; CHECK-LABEL: 'ult_constant_rhs_stride2_neg' +; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs_stride2_neg +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 2 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 255 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +define void @ult_restricted_rhs(i16 %n.raw) { +; CHECK-LABEL: 'ult_restricted_rhs' +; CHECK-NEXT: Determining loop execution counts for: @ult_restricted_rhs +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax (zext i8 (trunc i16 %n.raw to i8) to i16))) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 254 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (zext i8 (trunc i16 %n.raw to i8) to i16))) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + %n = and i16 %n.raw, 255 + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_guarded_rhs(i16 %n) {; +; CHECK-LABEL: 'ult_guarded_rhs' +; CHECK-NEXT: Determining loop execution counts for: @ult_guarded_rhs +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax %n)) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is -2 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax %n)) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + %in_range = icmp ult i16 %n, 256 + br i1 %in_range, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + declare void @llvm.assume(i1) diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll --- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll +++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll @@ -129,13 +129,13 @@ ; CHECK-LABEL: @slt_non_constant_rhs_no_mustprogress( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = and i16 [[N_RAW:%.*]], 255 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[N]] to i8 +; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 1) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i16 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i16 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i16 [[INDVARS_IV_NEXT]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -932,17 +932,18 @@ define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @ult_multiuse_profit( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 +; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[START:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[ZEXT_LCSSA:%.*]] = phi i16 [ [[ZEXT]], [[FOR_BODY]] ] -; CHECK-NEXT: ret i16 [[ZEXT_LCSSA]] +; CHECK-NEXT: [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP1]], i16 254) +; CHECK-NEXT: ret i16 [[UMAX]] ; entry: br label %for.body @@ -993,13 +994,13 @@ ; CHECK-LABEL: @slt_restricted_rhs( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = and i16 [[N_RAW:%.*]], 255 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[N]] to i8 +; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 1) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i16 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i16 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i16 [[INDVARS_IV_NEXT]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ;