Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -11606,6 +11606,15 @@ const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; + /// Return true if we can prove that this exit must taken on some iteration + /// of the loop. + auto exitMustBeTaken = [&]() { + if (!ControlsExit || !loopHasNoAbnormalExits(L)) + return false; + + return loopIsFiniteByAssumption(L); + }; + auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) { // Can we prove this loop *must* be UB if overflow of IV occurs? // Reasoning goes as follows: @@ -11628,10 +11637,7 @@ if (!StrideC || !StrideC->getAPInt().isPowerOf2()) return false; - if (!ControlsExit || !loopHasNoAbnormalExits(L)) - return false; - - return loopIsFiniteByAssumption(L); + return exitMustBeTaken(); }; if (!IV) { @@ -11644,9 +11650,42 @@ SmallVector Operands{AR->operands()}; Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); - - setNoWrapFlags(const_cast(AR), Flags); } + + auto canProveNUW = [&]() { + if (!isLoopInvariant(RHS, L)) + return false; + + if (getUnsignedRangeMin(AR->getStepRecurrence(*this)).isZero()) + // We need the sequence defined by AR to strictly increase in the + // unsigned integer domain for the logic below to hold. + return false; + + const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType()); + const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType()); + // If RHS <= Limit, then there must exist a value in the sequence + // defined by AR (e.g. {Start,+,Step}) > RHS, and <= UINT_MAX. Thus, + // we must exit the loop before unsigned overflow occurs. + // NOTE: The logic here closely parallels computeMaxBECountForLT, we + // may be able to common this in some way. + APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this)); + APInt Limit = APInt::getMaxValue(InnerBitWidth) - StrideMax; + Limit = Limit.zext(OuterBitWidth); + auto RHSCR = getUnsignedRange(RHS); + if (exitMustBeTaken()) { + // If the exit must be taken, then we know that RHS can't take a + // value which can't be represented by the extension of the narrow + // IV. Otherwise, we'd have UB. + auto FullCR = ConstantRange::getFull(InnerBitWidth); + FullCR = FullCR.zeroExtend(OuterBitWidth); + RHSCR = RHSCR.intersectWith(FullCR, ConstantRange::Unsigned); + } + return RHSCR.getUnsignedMax().ule(Limit); + }; + if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW()) + Flags = setFlags(Flags, SCEV::FlagNUW); + + setNoWrapFlags(const_cast(AR), Flags); if (AR->hasNoUnsignedWrap()) { // Emulate what getZeroExtendExpr would have done during construction // if we'd been able to infer the fact just above at that time. Index: llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll +++ llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll @@ -100,5 +100,112 @@ ret void } +; CHECK: Determining loop execution counts for: @rhs_mustexit_1 +; CHECK: Loop %for.body: backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) +; CHECK: Loop %for.body: max backedge-taken count is -2 +define void @rhs_mustexit_1(i16 %n.raw) mustprogress { +entry: + %n.and = and i16 %n.raw, 255 + %n = add nsw i16 %n.and, -1 + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + store i8 %iv, i8* @G + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +; CHECK: Determining loop execution counts for: @rhs_mustexit_3 +; CHECK: Loop %for.body: backedge-taken count is ((-1 + (3 umax (-3 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) /u 3) +; CHECK: Loop %for.body: max backedge-taken count is 21844 +define void @rhs_mustexit_3(i16 %n.raw) mustprogress { +entry: + %n.and = and i16 %n.raw, 255 + %n = add nsw i16 %n.and, -3 + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 3 + store i8 %iv, i8* @G + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +; CHECK: Determining loop execution counts for: @neg_rhs_wrong_range +; CHECK: Loop %for.body: Unpredictable backedge-taken count. +; CHECK: Loop %for.body: Unpredictable max backedge-taken count. +define void @neg_rhs_wrong_range(i16 %n.raw) mustprogress { +entry: + %n.and = and i16 %n.raw, 255 + %n = add nsw i16 %n.and, -1 + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 2 + store i8 %iv, i8* @G + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +; CHECK: Determining loop execution counts for: @neg_rhs_maybe_infinite +; CHECK: Loop %for.body: Unpredictable backedge-taken count. +; CHECK: Loop %for.body: Unpredictable max backedge-taken count. +define void @neg_rhs_maybe_infinite(i16 %n.raw) { +entry: + %n.and = and i16 %n.raw, 255 + %n = add nsw i16 %n.and, -1 + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + store i8 %iv, i8* @G + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +; Because of the range on RHS including only values within i8, we don't need +; the must exit property +; CHECK: Determining loop execution counts for: @rhs_narrow_range +; CHECK: Loop %for.body: backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16)))) +; CHECK: Loop %for.body: max backedge-taken count is 253 +define void @rhs_narrow_range(i16 %n.raw) { +entry: + %n = and i16 %n.raw, 254 + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + store i8 %iv, i8* @G + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + declare void @llvm.assume(i1)