Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -8754,8 +8754,33 @@
                                               const SCEVAddRecExpr *LHS,
                                               const SCEV *RHS) {
   const Loop *L = LHS->getLoop();
-  return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) &&
-         isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS);
+  // Check if the condition is met on the first iteration.
+  if (!isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS))
+    return false;
+
+  // If the backedge is guarded by condition for post-inc LHS, then we've
+  // proved that our predicate will be true on every iteration.
+  if (isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS))
+    return true;
+
+  // If the predicate is monotonic, and we've already proved it on the first
+  // iteration, it is enough to prove it on the last iteration. The tricky part
+  // is that the monotonicity could be cased on no-wrap flag which was derived
+  // from a taken side-exit (for example, a guard), and if we actually did the
+  // number of iterations returned by getBackedgeTakenCount(), we would have
+  // overflown. That is why we make an extra check that we don't overflow even
+  // if we make this many iterations.
+  bool Increasing = false;
+  if (isMonotonicPredicate(LHS, Pred, Increasing) &&
+      isProvedNoOverflowOnNormalExit(LHS, ICmpInst::isSigned(Pred))) {
+    const SCEV *End = LHS->evaluateAtIteration(getBackedgeTakenCount(L), *this);
+    if (isAvailableAtLoopEntry(End, L) &&
+        isLoopEntryGuardedByCond(L, Pred, End, RHS))
+      return true;
+  }
+
+  // Failed to prove anything, conservatively return false.
+  return false;
 }
 
 bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
Index: test/Transforms/IndVarSimplify/prove_via_monotonicity.ll
===================================================================
--- /dev/null
+++ test/Transforms/IndVarSimplify/prove_via_monotonicity.ll
@@ -0,0 +1,102 @@
+; RUN: opt -indvars -S < %s  | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.experimental.guard(i1, ...)
+
+; Check that we can widen the indvar for the decrementing loop. Phi's range
+; doesn't give us enough information, so we have to prove it using monotonicity.
+
+define void @test_01(i32* %p, i32* %a) {
+
+; CHECK-LABEL: @test_01(
+; CHECK-NOT:     trunc
+
+entry:
+  %len = load i32, i32* %p, align 4, !range !0
+  %len.minus.1 = add nsw i32 %len, -1
+  %zero_check = icmp eq i32 %len, 0
+  br i1 %zero_check, label %loopexit, label %preheader
+
+preheader:
+  br label %loop
+
+loopexit:
+  ret void
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ %len.minus.1, %preheader ]
+  ; CHECK: %indvars.iv = phi i64
+  %iv.wide = zext i32 %iv to i64
+  %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide
+  store atomic i32 0, i32* %el unordered, align 4
+  %iv.next = add nsw i32 %iv, -1
+  ; CHECK: %loopcond = icmp slt i64 %indvars.iv, 1
+  %loopcond = icmp slt i32 %iv, 1
+  br i1 %loopcond, label %loopexit, label %loop
+}
+
+define void @test_02(i32* %p, i32* %a) {
+
+; CHECK-LABEL: @test_02(
+; CHECK-NOT:     trunc
+
+entry:
+  %len = load i32, i32* %p, align 4, !range !0
+  %len.minus.1 = add nsw i32 %len, -1
+  %zero_check = icmp eq i32 %len, 0
+  br i1 %zero_check, label %loopexit, label %preheader
+
+preheader:
+  br label %loop
+
+loopexit:
+  ret void
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ %len.minus.1, %preheader ]
+  ; CHECK: %indvars.iv = phi i64
+  %iv.wide = zext i32 %iv to i64
+  %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide
+  store atomic i32 0, i32* %el unordered, align 4
+  %iv.next = add nsw i32 %iv, -1
+  ; CHECK: %loopcond = icmp ult i64 %indvars.iv, 1
+  %loopcond = icmp ult i32 %iv, 1
+  br i1 %loopcond, label %loopexit, label %loop
+}
+
+; In this situation the indvar has nsw, but it is derived from the guard. If the
+; side exit wasn't present, we would be unable to prove it. Make sure that we do
+; not optimize away the guard basing on the fact which was derived from this
+; very guard.
+define void @test_03(i32* %p, i32* %a) {
+
+; CHECK-LABEL: @test_03
+
+entry:
+  %len = load i32, i32* %p, align 4, !range !0
+  %len.minus.1 = add nsw i32 %len, -1
+  %zero_check = icmp eq i32 %len, 0
+  br i1 %zero_check, label %loopexit, label %preheader
+
+preheader:
+  br label %loop
+
+loopexit:
+  ret void
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ -2000000000, %preheader ]
+  %iv.wide = zext i32 %iv to i64
+  %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide
+  %fishy = icmp sgt i32 %iv, -2000000009
+  ; CHECK-NOT: call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard(i1 %fishy) [ "deopt"() ]
+  store atomic i32 0, i32* %el unordered, align 4
+  %iv.next = add nsw i32 %iv, -1
+  %loopcond = icmp eq i32 %iv, 2000000000
+  br i1 %loopcond, label %loopexit, label %loop
+}
+
+!0 = !{i32 0, i32 2147483647}