Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -3830,8 +3830,67 @@
 }
 
 
+/// Returns true if the arithmetic part of the \p CI 's result is used
+/// only along the paths control dependent on the computation not
+/// overflowing, \p CI being a op.with.overflow intrinsic.
+static bool IsResultNoOverflow(CallInst *CI, DominatorTree &DT) {
+  // Find a branch guarded by the overflow check.
+  BranchInst *Branch = nullptr;
+  SmallVector<Instruction *, 2> AddVals;
+
+  for (User *U : CI->users()) {
+    if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+      assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
+
+      if (EVI->getIndices()[0] == 0)
+        AddVals.push_back(EVI);
+      else {
+        assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
+        if (!EVI->hasOneUse())
+          return false;
+
+        Branch = dyn_cast<BranchInst>(EVI->user_back());
+        if (!Branch)
+          return false;
+      }
+    } else {
+      // We could be using the aggregate directly (storing it to an alloca,
+      // say), so don't take chances.
+      return false;
+    }
+  }
+
+  if (!Branch)
+    return false;
+
+  BasicBlock *NoWrapBlock = Branch->getSuccessor(1);
+  if (std::next(pred_begin(NoWrapBlock)) != pred_end(NoWrapBlock))
+    return false;
+
+  // Check if all users of the add are provably no-wrap.
+  for (auto *AddVal : AddVals) {
+    // If the extractvalue itself is not executed on overflow, the we don't need
+    // to run the loop at all (since domination is transitive).
+    if (DT.dominates(NoWrapBlock, AddVal->getParent()))
+      continue;
+
+    for (Use &U : AddVal->uses()) {
+      if (auto *UseInst = dyn_cast<Instruction>(U.getUser())) {
+        BasicBlock *UseBB = UseInst->getParent();
+        if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
+          UseBB = PHI->getIncomingBlock(U);
+        if (!DT.dominates(NoWrapBlock, UseBB))
+          return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+
 /// Try to map \p V into a BinaryOp, and return \c None on failure.
-static Optional<BinaryOp> MatchBinaryOp(Value *V) {
+static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
   auto *Op = dyn_cast<Operator>(V);
   if (!Op)
     return None;
@@ -3877,6 +3936,50 @@
     }
     return BinaryOp(Op);
 
+  case Instruction::ExtractValue: {
+    auto *EVI = cast<ExtractValueInst>(Op);
+    if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
+      break;
+
+    auto *CI = dyn_cast<CallInst>(EVI->getAggregateOperand());
+    if (!CI)
+      break;
+
+    if (auto *F = CI->getCalledFunction())
+      switch (F->getIntrinsicID()) {
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::uadd_with_overflow: {
+        if (!IsResultNoOverflow(CI, DT))
+          return BinaryOp(Instruction::Add, CI->getArgOperand(0),
+                          CI->getArgOperand(1));
+
+        // Now that we know that all uses of the arithmetic-result component of
+        // CI are guarded by the overflow check, we can go ahead and pretend
+        // that the arithmetic is non-overflowing.
+        if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow)
+          return BinaryOp(Instruction::Add, CI->getArgOperand(0),
+                          CI->getArgOperand(1), /* IsNSW = */ true,
+                          /* IsNUW = */ false);
+        else
+          return BinaryOp(Instruction::Add, CI->getArgOperand(0),
+                          CI->getArgOperand(1), /* IsNSW = */ false,
+                          /* IsNUW*/ true);
+      }
+
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::usub_with_overflow:
+        return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
+                        CI->getArgOperand(1));
+
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::umul_with_overflow:
+        return BinaryOp(Instruction::Mul, CI->getArgOperand(0),
+                        CI->getArgOperand(1));
+      default:
+        break;
+      }
+  }
+
   default:
     break;
   }
@@ -3953,7 +4056,7 @@
 
           // If the increment doesn't overflow, then neither the addrec nor
           // the post-increment will overflow.
-          if (auto BO = MatchBinaryOp(BEValueV)) {
+          if (auto BO = MatchBinaryOp(BEValueV, DT)) {
             if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
               if (BO->IsNUW)
                 Flags = setFlags(Flags, SCEV::FlagNUW);
@@ -4833,7 +4936,7 @@
     return getUnknown(V);
 
   Operator *U = cast<Operator>(V);
-  if (auto BO = MatchBinaryOp(U)) {
+  if (auto BO = MatchBinaryOp(U, DT)) {
     switch (BO->Opcode) {
     case Instruction::Add: {
       // The simple thing to do would be to just call getSCEV on both operands
@@ -4874,7 +4977,7 @@
         else
           AddOps.push_back(getSCEV(BO->RHS));
 
-        auto NewBO = MatchBinaryOp(BO->LHS);
+        auto NewBO = MatchBinaryOp(BO->LHS, DT);
         if (!NewBO || (NewBO->Opcode != Instruction::Add &&
                        NewBO->Opcode != Instruction::Sub)) {
           AddOps.push_back(getSCEV(BO->LHS));
@@ -4904,7 +5007,7 @@
         }
 
         MulOps.push_back(getSCEV(BO->RHS));
-        auto NewBO = MatchBinaryOp(BO->LHS);
+        auto NewBO = MatchBinaryOp(BO->LHS, DT);
         if (!NewBO || NewBO->Opcode != Instruction::Mul) {
           MulOps.push_back(getSCEV(BO->LHS));
           break;
Index: test/Analysis/ScalarEvolution/overflow-intrinsics.ll
===================================================================
--- /dev/null
+++ test/Analysis/ScalarEvolution/overflow-intrinsics.ll
@@ -0,0 +1,250 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f_sadd_0(i8* %a) {
+; CHECK-LABEL: Classifying expressions for: @f_sadd_0
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+; CHECK:  %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ]
+; CHECK-NEXT:  -->  {0,+,1}<nuw><nsw><%for.body> U: [0,16) S: [0,16)
+
+  %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 1
+  br i1 %tmp1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap() #2, !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 0
+  %cmp = icmp slt i32 %tmp2, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+; CHECK: Loop %for.body: max backedge-taken count is 15
+}
+
+define void @f_sadd_1(i8* %a) {
+; CHECK-LABEL: Classifying expressions for: @f_sadd_1
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+; CHECK:  %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ]
+; CHECK-NEXT:  -->  {0,+,1}<%for.body> U: [0,16) S: [0,16)
+
+; SCEV can prove <nsw> for the above induction variable; but it does
+; not bother so before it sees the sext below since it is not a 100%
+; obvious.
+
+  %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 1
+  br i1 %tmp1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+
+  br label %cont
+
+cont:                                             ; preds = %for.body
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 0
+  %cmp = icmp slt i32 %tmp2, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+; CHECK: Loop %for.body: max backedge-taken count is 15
+}
+
+define void @f_sadd_2(i8* %a, i1* %c) {
+; CHECK-LABEL: Classifying expressions for: @f_sadd_2
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+; CHECK:  %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ]
+; CHECK-NEXT:  -->  {0,+,1}<%for.body>
+
+  %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 1
+  br i1 %tmp1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+
+  br label %cont
+
+cont:                                             ; preds = %for.body
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 0
+  %cond = load volatile i1, i1* %c
+  br i1 %cond, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_sadd_may_overflow(i8* %a, i1* %c) {
+; CHECK-LABEL: Classifying expressions for: @f_sadd_may_overflow
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+; CHECK:  %i.04 = phi i32 [ 0, %entry ], [ %tmp1, %cont ]
+; CHECK-NEXT:  -->  {0,+,1}<%for.body> U: full-set S: full-set
+
+  %i.04 = phi i32 [ 0, %entry ], [ %tmp1, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
+  %cond1 = load volatile i1, i1* %c
+  br i1 %cond1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap() #2, !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 0
+  %cond = load volatile i1, i1* %c
+  br i1 %cond, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_uadd(i8* %a) {
+; CHECK-LABEL: Classifying expressions for: @f_uadd
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+; CHECK:  %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ]
+; CHECK-NEXT:  -->  {0,+,1}<nuw><%for.body> U: [0,16) S: [0,16)
+
+  %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %tmp0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1)
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 1
+  br i1 %tmp1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 0
+  %cmp = icmp slt i32 %tmp2, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+; CHECK: Loop %for.body: max backedge-taken count is 15
+}
+
+define void @f_ssub(i8* nocapture %a) {
+; CHECK-LABEL: Classifying expressions for: @f_ssub
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+; CHECK:  %i.04 = phi i32 [ 15, %entry ], [ %tmp2, %cont ]
+; CHECK-NEXT:  -->  {15,+,-1}<%for.body> U: [0,16) S: [0,16)
+
+  %i.04 = phi i32 [ 15, %entry ], [ %tmp2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %tmp0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1)
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 1
+  br i1 %tmp1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 0
+  %cmp = icmp sgt i32 %tmp2, -1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+; CHECK: Loop %for.body: max backedge-taken count is 15
+}
+
+define void @f_usub(i8* nocapture %a) {
+; CHECK-LABEL: Classifying expressions for: @f_usub
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+; CHECK:  %i.04 = phi i32 [ 15, %entry ], [ %tmp2, %cont ]
+; CHECK-NEXT:  -->  {15,+,-1}<%for.body> U: [0,16) S: [0,16)
+
+  %i.04 = phi i32 [ 15, %entry ], [ %tmp2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %tmp0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1)
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 1
+  br i1 %tmp1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 0
+  %cmp = icmp sgt i32 %tmp2, -1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+; CHECK: Loop %for.body: max backedge-taken count is 15
+}
+
+define i32 @f_smul(i32 %val_a, i32 %val_b) {
+; CHECK-LABEL: Classifying expressions for: @f_smul
+  %agg = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %val_a, i32 %val_b)
+; CHECK:   %mul = extractvalue { i32, i1 } %agg, 0
+; CHECK-NEXT:  -->  (%val_a * %val_b) U: full-set S: full-set
+  %mul = extractvalue { i32, i1 } %agg, 0
+  ret i32 %mul
+}
+
+define i32 @f_umul(i32 %val_a, i32 %val_b) {
+; CHECK-LABEL: Classifying expressions for: @f_umul
+  %agg = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %val_a, i32 %val_b)
+; CHECK:   %mul = extractvalue { i32, i1 } %agg, 0
+; CHECK-NEXT:  -->  (%val_a * %val_b) U: full-set S: full-set
+  %mul = extractvalue { i32, i1 } %agg, 0
+  ret i32 %mul
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+declare void @llvm.trap() #2