Index: llvm/trunk/include/llvm/IR/PatternMatch.h
===================================================================
--- llvm/trunk/include/llvm/IR/PatternMatch.h
+++ llvm/trunk/include/llvm/IR/PatternMatch.h
@@ -295,6 +295,9 @@
 /// \brief Match a value, capturing it if we match.
 inline bind_ty<Value> m_Value(Value *&V) { return V; }
 
+/// \brief Match an instruction, capturing it if we match.
+inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
+
 /// \brief Match a binary operator, capturing it if we match.
 inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
 
@@ -1103,6 +1106,52 @@
   return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
 }
 
+//===----------------------------------------------------------------------===//
+// Matchers for overflow check patterns: e.g. (a + b) u< a
+//
+
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+struct UAddWithOverflow_match {
+  LHS_t L;
+  RHS_t R;
+  Sum_t S;
+
+  UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
+      : L(L), R(R), S(S) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    Value *ICmpLHS, *ICmpRHS;
+    ICmpInst::Predicate Pred;
+    if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
+      return false;
+
+    Value *AddLHS, *AddRHS;
+    auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
+
+    // (a + b) u< a, (a + b) u< b
+    if (Pred == ICmpInst::ICMP_ULT)
+      if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
+
+    // a >u (a + b), b >u (a + b)
+    if (Pred == ICmpInst::ICMP_UGT)
+      if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
+
+    return false;
+  }
+};
+
+/// \brief Match an icmp instruction checking for unsigned overflow on addition.
+///
+/// S is matched to the addition whose result is being checked for overflow, and
+/// L and R are matched to the LHS and RHS of S.
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
+m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
+  return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
+}
+
 /// \brief Match an 'unordered' floating point minimum function.
 /// Floating point has one special value 'NaN'. Therefore, there is no total
 /// order. However, if we can ignore the 'NaN' value (for example, because of a
Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
@@ -747,13 +747,60 @@
   return SinkCast(CI);
 }
 
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+  Value *A, *B;
+  Instruction *AddI;
+  if (!match(CI,
+             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+    return false;
+
+  Type *Ty = AddI->getType();
+  if (!isa<IntegerType>(Ty))
+    return false;
+
+  // We don't want to move around uses of condition values this late, so we we
+  // check if it is legal to create the call to the intrinsic in the basic
+  // block containing the icmp:
+
+  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+    return false;
+
+#ifndef NDEBUG
+  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+  // for now:
+  if (AddI->hasOneUse())
+    assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+  Module *M = CI->getParent()->getParent()->getParent();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+  auto *UAddWithOverflow =
+      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  auto *Overflow =
+      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+  CI->replaceAllUsesWith(Overflow);
+  AddI->replaceAllUsesWith(UAdd);
+  CI->eraseFromParent();
+  AddI->eraseFromParent();
+  return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
 /// the number of virtual registers that must be created and coalesced.  This is
 /// a clear win except on targets with multiple condition code registers
 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
   BasicBlock *DefBB = CI->getParent();
 
   /// InsertedCmp - Only insert a cmp in each block once.
@@ -802,6 +849,16 @@
   return MadeChange;
 }
 
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  if (SinkCmpExpression(CI))
+    return true;
+
+  if (CombineUAddWithOverflow(CI))
+    return true;
+
+  return false;
+}
+
 /// isExtractBitsCandidateUse - Check if the candidates could
 /// be combined with shift instruction, which includes:
 /// 1. Truncate instruction
Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2109,35 +2109,6 @@
   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
 }
 
-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
-                                     InstCombiner &IC) {
-  // Don't bother doing this transformation for pointers, don't do it for
-  // vectors.
-  if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
-
-  // If the add is a constant expr, then we don't bother transforming it.
-  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
-  if (!OrigAdd) return nullptr;
-
-  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
-
-  // Put the new code above the original add, in case there are any uses of the
-  // add between the add and the compare.
-  InstCombiner::BuilderTy *Builder = IC.Builder;
-  Builder->SetInsertPoint(OrigAdd);
-
-  Module *M = I.getParent()->getParent()->getParent();
-  Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
-  Value *Add = Builder->CreateExtractValue(Call, 0);
-
-  IC.ReplaceInstUsesWith(*OrigAdd, Add);
-
-  // The original icmp gets replaced with the overflow value.
-  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
-}
-
 bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
                                          Value *RHS, Instruction &OrigI,
                                          Value *&Result, Constant *&Overflow) {
@@ -3539,21 +3510,18 @@
         return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
     }
 
-    // (a+b) <u a  --> llvm.uadd.with.overflow.
-    // (a+b) <u b  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
-        match(Op0, m_Add(m_Value(A), m_Value(B))) &&
-        (Op1 == A || Op1 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
-        return R;
-
-    // a >u (a+b)  --> llvm.uadd.with.overflow.
-    // b >u (a+b)  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
-        (Op0 == A || Op0 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
-        return R;
+    Instruction *AddI = nullptr;
+    if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+                                     m_Instruction(AddI))) &&
+        isa<IntegerType>(A->getType())) {
+      Value *Result;
+      Constant *Overflow;
+      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+                                Overflow)) {
+        ReplaceInstUsesWith(*AddI, Result);
+        return ReplaceInstUsesWith(I, Overflow);
+      }
+    }
 
     // (zext a) * (zext b)  --> llvm.umul.with.overflow.
     if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
Index: llvm/trunk/test/CodeGen/X86/add-of-carry.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/add-of-carry.ll
+++ llvm/trunk/test/CodeGen/X86/add-of-carry.ll
@@ -4,43 +4,26 @@
 define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
 entry:
 ; CHECK-LABEL: test1:
-; CHECK: cmpl %ecx, %eax 
-; CHECK-NOT: addl
-; CHECK: adcl $0, %eax
-  %add4 = add i32 %x, %sum
-  %cmp = icmp ult i32 %add4, %x
-  %inc = zext i1 %cmp to i32
-  %z.0 = add i32 %add4, %inc
-  ret i32 %z.0
-}
-
-; Instcombine transforms test1 into test2:
-; CHECK-LABEL: test2:
 ; CHECK: movl
 ; CHECK-NEXT: addl
 ; CHECK-NEXT: adcl $0
 ; CHECK-NEXT: ret
-define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
-entry:
-  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
-  %0 = extractvalue { i32, i1 } %uadd, 0
-  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %add4 = add i32 %x, %sum
+  %cmp = icmp ult i32 %add4, %x
   %inc = zext i1 %cmp to i32
-  %z.0 = add i32 %0, %inc
+  %z.0 = add i32 %add4, %inc
   ret i32 %z.0
 }
 
 ; <rdar://problem/12579915>
-define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
+define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
 entry:
   %cmp = icmp ugt i32 %x, %y
   %dec = sext i1 %cmp to i32
   %dec.res = add nsw i32 %dec, %res
   ret i32 %dec.res
-; CHECK-LABEL: test3:
+; CHECK-LABEL: test2:
 ; CHECK: cmpl
 ; CHECK: sbbl
 ; CHECK: ret
 }
-
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
Index: llvm/trunk/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll
+++ llvm/trunk/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll
@@ -0,0 +1,74 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK-LABEL: @test1(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test2(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test3(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test4(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: extractvalue
+; CHECK: extractvalue
+; CHECK: select
+define i64 @test4(i64 %a, i64 %b, i1 %c) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+ next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+ exit:
+  ret i64 0
+}
+
+; CHECK-LABEL: @test5(
+; CHECK-NOT: llvm.uadd.with.overflow
+; CHECK: next
+define i64 @test5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  store i64 %add, i64* %ptr
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+ next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+ exit:
+  ret i64 0
+}
Index: llvm/trunk/test/Transforms/InstCombine/overflow.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/overflow.ll
+++ llvm/trunk/test/Transforms/InstCombine/overflow.ll
@@ -97,39 +97,6 @@
 ; CHECK: ret i8
 }
 
-; CHECK-LABEL: @test5(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test5(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ult i64 %add, %a
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
-; CHECK-LABEL: @test6(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test6(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ult i64 %add, %b
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
-; CHECK-LABEL: @test7(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test7(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ugt i64 %b, %add
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
 ; CHECK-LABEL: @test8(
 ; PR11438
 ; This is @test1, but the operands are not sign-extended.  Make sure