Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -11606,6 +11606,15 @@ const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; + /// Return true if we can prove that this exit must taken on some iteration + /// of the loop. + auto exitMustBeTaken = [&]() { + if (!ControlsExit || !loopHasNoAbnormalExits(L)) + return false; + + return loopIsFiniteByAssumption(L); + }; + auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) { // Can we prove this loop *must* be UB if overflow of IV occurs? // Reasoning goes as follows: @@ -11628,10 +11637,7 @@ if (!StrideC || !StrideC->getAPInt().isPowerOf2()) return false; - if (!ControlsExit || !loopHasNoAbnormalExits(L)) - return false; - - return loopIsFiniteByAssumption(L); + return exitMustBeTaken(); }; if (!IV) { @@ -11644,9 +11650,42 @@ SmallVector Operands{AR->operands()}; Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); - - setNoWrapFlags(const_cast(AR), Flags); } + + auto canProveNUW = [&]() { + if (!isLoopInvariant(RHS, L)) + return false; + + if (getUnsignedRangeMin(AR->getStepRecurrence(*this)).isZero()) + // We need the sequence defined by AR to strictly increase in the + // unsigned integer domain for the logic below to hold. + return false; + + const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType()); + const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType()); + // If RHS <= Limit, then there must exist a value in the sequence + // defined by AR (e.g. {Start,+,Step}) > RHS, and <= UINT_MAX. Thus, + // we must exit the loop before unsigned overflow occurs. + // NOTE: The logic here closely parallels computeMaxBECountForLT, we + // may be able to common this in some way. + APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this)); + APInt Limit = APInt::getMaxValue(InnerBitWidth) - StrideMax; + Limit = Limit.zext(OuterBitWidth); + auto RHSCR = getUnsignedRange(RHS); + if (exitMustBeTaken()) { + // If the exit must be taken, then we know that RHS can't take a + // value which can't be represented by the extension of the narrow + // IV. Otherwise, we'd have UB. + auto FullCR = ConstantRange::getFull(InnerBitWidth); + FullCR = FullCR.zeroExtend(OuterBitWidth); + RHSCR = RHSCR.intersectWith(FullCR, ConstantRange::Unsigned); + } + return RHSCR.getUnsignedMax().ule(Limit); + }; + if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW()) + Flags = setFlags(Flags, SCEV::FlagNUW); + + setNoWrapFlags(const_cast(AR), Flags); if (AR->hasNoUnsignedWrap()) { // Emulate what getZeroExtendExpr would have done during construction // if we'd been able to infer the fact just above at that time. Index: llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll +++ llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll @@ -125,11 +125,11 @@ define void @rhs_mustexit_1(i16 %n.raw) mustprogress { ; CHECK-LABEL: 'rhs_mustexit_1' ; CHECK-NEXT: Determining loop execution counts for: @rhs_mustexit_1 -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is -2 ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: %n.and = and i16 %n.raw, 255 @@ -151,9 +151,11 @@ define void @rhs_mustexit_3(i16 %n.raw) mustprogress { ; CHECK-LABEL: 'rhs_mustexit_3' ; CHECK-NEXT: Determining loop execution counts for: @rhs_mustexit_3 -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + (3 umax (-3 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) /u 3) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 21844 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-1 + (3 umax (-3 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) /u 3) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: %n.and = and i16 %n.raw, 255 @@ -279,11 +281,11 @@ define void @rhs_narrow_range(i16 %n.raw) { ; CHECK-LABEL: 'rhs_narrow_range' ; CHECK-NEXT: Determining loop execution counts for: @rhs_narrow_range -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16)))) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 253 ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16)))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: %n = and i16 %n.raw, 254