Index: llvm/lib/Analysis/ScalarEvolution.cpp
===================================================================
--- llvm/lib/Analysis/ScalarEvolution.cpp
+++ llvm/lib/Analysis/ScalarEvolution.cpp
@@ -11606,6 +11606,15 @@
   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
   bool PredicatedIV = false;
 
+  /// Return true if we can prove that this exit must taken on some iteration
+  /// of the loop.
+  auto exitMustBeTaken = [&]() {
+    if (!ControlsExit || !loopHasNoAbnormalExits(L))
+      return false;
+
+    return loopIsFiniteByAssumption(L);
+  };
+
   auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) {
     // Can we prove this loop *must* be UB if overflow of IV occurs?
     // Reasoning goes as follows:
@@ -11628,10 +11637,7 @@
     if (!StrideC || !StrideC->getAPInt().isPowerOf2())
       return false;
 
-    if (!ControlsExit || !loopHasNoAbnormalExits(L))
-      return false;
-
-    return loopIsFiniteByAssumption(L);
+    return exitMustBeTaken();
   };
 
   if (!IV) {
@@ -11644,9 +11650,42 @@
 
           SmallVector<const SCEV*> Operands{AR->operands()};
           Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
-
-          setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
         }
+
+        auto canProveNUW = [&]() {
+          if (!isLoopInvariant(RHS, L))
+            return false;
+
+          if (getUnsignedRangeMin(AR->getStepRecurrence(*this)).isZero())
+            // We need the sequence defined by AR to strictly increase in the
+            // unsigned integer domain for the logic below to hold.
+            return false;
+
+          const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType());
+          const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType());
+          // If RHS <= Limit, then there must exist a value in the sequence
+          // defined by AR (e.g. {Start,+,Step}) > RHS, and <= UINT_MAX.  Thus,
+          // we must exit the loop before unsigned overflow occurs.
+          // NOTE: The logic here closely parallels computeMaxBECountForLT, we
+          // may be able to common this in some way.
+          APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this));
+          APInt Limit = APInt::getMaxValue(InnerBitWidth) - StrideMax;
+          Limit = Limit.zext(OuterBitWidth);
+          auto RHSCR = getUnsignedRange(RHS);
+          if (exitMustBeTaken()) {
+            // If the exit must be taken, then we know that RHS can't take a
+            // value which can't be represented by the extension of the narrow
+            // IV.  Otherwise, we'd have UB.
+            auto FullCR = ConstantRange::getFull(InnerBitWidth);
+            FullCR = FullCR.zeroExtend(OuterBitWidth);
+            RHSCR = RHSCR.intersectWith(FullCR, ConstantRange::Unsigned);
+          }
+          return RHSCR.getUnsignedMax().ule(Limit);
+        };
+        if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW())
+          Flags = setFlags(Flags, SCEV::FlagNUW);
+
+        setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
         if (AR->hasNoUnsignedWrap()) {
           // Emulate what getZeroExtendExpr would have done during construction
           // if we'd been able to infer the fact just above at that time.
Index: llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
===================================================================
--- llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
+++ llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
@@ -125,11 +125,11 @@
 define void @rhs_mustexit_1(i16 %n.raw) mustprogress {
 ; CHECK-LABEL: 'rhs_mustexit_1'
 ; CHECK-NEXT:  Determining loop execution counts for: @rhs_mustexit_1
-; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %for.body: Unpredictable max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
+; CHECK-NEXT:  Loop %for.body: max backedge-taken count is -2
 ; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:    {1,+,1}<nw><%for.body> Added Flags: <nusw>
+; CHECK:       Loop %for.body: Trip multiple is 1
 ;
 entry:
   %n.and = and i16 %n.raw, 255
@@ -151,9 +151,11 @@
 define void @rhs_mustexit_3(i16 %n.raw) mustprogress {
 ; CHECK-LABEL: 'rhs_mustexit_3'
 ; CHECK-NEXT:  Determining loop execution counts for: @rhs_mustexit_3
-; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %for.body: Unpredictable max backedge-taken count.
-; CHECK-NEXT:  Loop %for.body: Unpredictable predicated backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + (3 umax (-3 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>)) /u 3)
+; CHECK-NEXT:  Loop %for.body: max backedge-taken count is 21844
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is ((-1 + (3 umax (-3 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>)) /u 3)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.body: Trip multiple is 1
 ;
 entry:
   %n.and = and i16 %n.raw, 255
@@ -279,11 +281,11 @@
 define void @rhs_narrow_range(i16 %n.raw) {
 ; CHECK-LABEL: 'rhs_narrow_range'
 ; CHECK-NEXT:  Determining loop execution counts for: @rhs_narrow_range
-; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %for.body: Unpredictable max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16))<nuw><nsw>))<nsw>
+; CHECK-NEXT:  Loop %for.body: max backedge-taken count is 253
 ; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16))<nuw><nsw>))<nsw>
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:    {1,+,1}<%for.body> Added Flags: <nusw>
+; CHECK:       Loop %for.body: Trip multiple is 1
 ;
 entry:
   %n = and i16 %n.raw, 254