Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6766,9 +6766,28 @@
   // If this is an IV which we could replace the terminating condition, return
   // the final value of the alternative IV on the last iteration.
   auto getAlternateIVEnd = [&](PHINode &PN) -> const SCEV * {
-    // FIXME: This does not properly account for overflow.
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
     const SCEV *BECount = SE.getBackedgeTakenCount(L);
+
+    // Check that we can compute the value of AddRec on the exiting iteration
+    // without soundness problems.  There are two cases to be worried about:
+    // 1) BECount could be 255 with type i8.  Simply adding one would be
+    //    incorrect.  We may need one extra bit to represent the unsigned
+    //    trip count.
+    // 2) The multiplication of stride by TC may wrap around.  This is subtle
+    //    because computing the result accounting for wrap is insufficient.
+    //    In order to use the result in an exit test, we must also know that
+    //    AddRec doesn't take the same value on any previous iteration.
+    //    The simplest case to consider is a candidate IV which is narrower
+    //    than the trip count (and thus original IV), but this can also
+    //    happen due to non-unit strides on the candidate IVs.
+    ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE));
+    ConstantRange BECountCR = SE.getUnsignedRange(BECount);
+    unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + 1 + StepCR.getMinSignedBits();
+    unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType());
+    if (NoOverflowBitWidth > ARBitWidth)
+      return nullptr;
+
     const SCEV *TermValueS = SE.getAddExpr(
         AddRec->getOperand(0),
         SE.getTruncateOrZeroExtend(
Index: llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
===================================================================
--- llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
+++ llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
@@ -192,20 +192,18 @@
 ; In this case, the integer IV has a larger bitwidth than the pointer IV.
 ; This means that the smaller IV may wrap around multiple times before
 ; the original loop exit is taken.
-; FIXME: miscompile
 define void @iv_size(ptr %a, i128 %N) {
 ; CHECK-LABEL: @iv_size(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i128 [[N:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i128 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N:%.*]], [[ENTRY]] ]
 ; CHECK-NEXT:    store i32 1, ptr [[LSR_IV1]], align 4
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nsw i128 [[LSR_IV]], -1
 ; CHECK-NEXT:    [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
-; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i128 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;