Index: llvm/lib/Target/ARM/ARMISelLowering.h
===================================================================
--- llvm/lib/Target/ARM/ARMISelLowering.h
+++ llvm/lib/Target/ARM/ARMISelLowering.h
@@ -85,6 +85,7 @@
       FMSTAT,       // ARM fmstat instruction.
 
       CMOV,         // ARM conditional move instructions.
+      OpaqueSUB,    // Subtract that DAG combiner should ignore.
 
       SSAT,         // Signed saturation
       USAT,         // Unsigned saturation
Index: llvm/lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1281,6 +1281,7 @@
   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
 
   case ARMISD::CMOV:          return "ARMISD::CMOV";
+  case ARMISD::OpaqueSUB:     return "ARMISD::OpaqueSUB";
 
   case ARMISD::SSAT:          return "ARMISD::SSAT";
   case ARMISD::USAT:          return "ARMISD::USAT";
@@ -12720,21 +12721,21 @@
                         DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
         Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
       }
-    } else if (CC == ARMCC::NE && LHS != RHS &&
+    } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
                (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
       // This seems pointless but will allow us to combine it further below.
       // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
-      SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+      SDValue Sub = DAG.getNode(ARMISD::OpaqueSUB, dl, VT, LHS, RHS);
       Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
                         N->getOperand(3), Cmp);
     }
   } else if (isNullConstant(TrueVal)) {
-    if (CC == ARMCC::EQ && LHS != RHS &&
+    if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
         (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
       // This seems pointless but will allow us to combine it further below
       // Note that we change == for != as this is the dual for the case above.
       // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
-      SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+      SDValue Sub = DAG.getNode(ARMISD::OpaqueSUB, dl, VT, LHS, RHS);
       Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
                         DAG.getConstant(ARMCC::NE, dl, MVT::i32),
                         N->getOperand(3), Cmp);
@@ -12751,8 +12752,8 @@
   //       t4 = (SUB 1, t2:1)   [ we want a carry, not a borrow ]
   const APInt *TrueConst;
   if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
-      (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) &&
-      (FalseVal.getOperand(1) == RHS) &&
+      (FalseVal.getOpcode() == ARMISD::OpaqueSUB) &&
+      (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
       (TrueConst = isPowerOf2Constant(TrueVal))) {
     SDVTList VTs = DAG.getVTList(VT, MVT::i32);
     unsigned ShiftAmount = TrueConst->logBase2();
Index: llvm/lib/Target/ARM/ARMInstrInfo.td
===================================================================
--- llvm/lib/Target/ARM/ARMInstrInfo.td
+++ llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -144,6 +144,7 @@
                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
                               [SDNPInGlue]>;
+def ARMopaquesub     : SDNode<"ARMISD::OpaqueSUB", SDTIntBinOp>;
 
 def ARMssatnoshift   : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
 
@@ -3622,6 +3623,14 @@
 defm SUB  : AsI1_bin_irs<0b0010, "sub",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr, sub>;
 
+
+def : ARMPat<(ARMopaquesub GPR:$Rn, mod_imm:$imm), (SUBri $Rn, mod_imm:$imm)>;
+def : ARMPat<(ARMopaquesub GPR:$Rn, GPR:$Rm), (SUBrr $Rn, $Rm)>;
+def : ARMPat<(ARMopaquesub GPR:$Rn, so_reg_imm:$shift),
+             (SUBrsi $Rn, so_reg_imm:$shift)>;
+def : ARMPat<(ARMopaquesub GPR:$Rn, so_reg_reg:$shift),
+             (SUBrsr $Rn, so_reg_reg:$shift)>;
+
 // ADD and SUB with 's' bit set.
 //
 // Currently, ADDS/SUBS are pseudo opcodes that exist only in the
Index: llvm/lib/Target/ARM/ARMInstrThumb.td
===================================================================
--- llvm/lib/Target/ARM/ARMInstrThumb.td
+++ llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1282,6 +1282,8 @@
   bits<3> imm3;
   let Inst{8-6} = imm3;
 }
+def : T1Pat<(ARMopaquesub tGPR:$Rn, imm0_7:$imm3),
+            (tSUBi3 $Rn, imm0_7:$imm3)>;
 
 def tSUBi8 :                    // A8.6.210 T2
   T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn),
@@ -1289,6 +1291,8 @@
                     "sub", "\t$Rdn, $imm8",
                     [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>,
                     Sched<[WriteALU]>;
+def : T1Pat<(ARMopaquesub tGPR:$Rn, imm0_255:$imm8),
+            (tSUBi8 $Rn, imm0_255:$imm8)>;
 
 def : tInstSubst<"add${s}${p} $rd, $rn, $imm",
                  (tSUBi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>;
@@ -1305,6 +1309,7 @@
                 "sub", "\t$Rd, $Rn, $Rm",
                 [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>,
                 Sched<[WriteALU]>;
+def : T1Pat<(ARMopaquesub tGPR:$Rn, tGPR:$Rm), (tSUBrr $Rn, $Rm)>;
 
 def : tInstAlias <"sub${s}${p} $Rdn, $Rm",
                  (tSUBrr tGPR:$Rdn,s_cc_out:$s, tGPR:$Rdn, tGPR:$Rm, pred:$p)>;
Index: llvm/lib/Target/ARM/ARMInstrThumb2.td
===================================================================
--- llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2081,6 +2081,14 @@
 defm t2ADD  : T2I_bin_ii12rs<0b000, "add", add, 1>;
 defm t2SUB  : T2I_bin_ii12rs<0b101, "sub", sub>;
 
+def : T2Pat<(ARMopaquesub GPRnopc:$Rn, t2_so_imm:$imm),
+            (t2SUBri $Rn, t2_so_imm:$imm)>;
+def : T2Pat<(ARMopaquesub GPRnopc:$Rn, imm0_4095:$imm),
+            (t2SUBri12 $Rn, imm0_4095:$imm)>;
+def : T2Pat<(ARMopaquesub GPRnopc:$Rn, rGPR:$Rm), (t2SUBrr $Rn, $Rm)>;
+def : T2Pat<(ARMopaquesub GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
+            (t2SUBrs $Rn, t2_so_reg:$ShiftedRm)>;
+
 // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
 //
 // Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
Index: llvm/test/CodeGen/ARM/select.ll
===================================================================
--- llvm/test/CodeGen/ARM/select.ll
+++ llvm/test/CodeGen/ARM/select.ll
@@ -142,3 +142,17 @@
   ret float %2
 }
 
+; N.b. sub is redundant with cmp. Don't worry if peepholer realises this and
+; removes the cmp in favour of a subs.
+; CHECK-LABEL: test_overflow_recombine:
+define i1 @test_overflow_recombine(i32 %in) {
+; CHECK: smull [[LO:r[0-9]+]], [[HI:r[0-9]+]]
+; CHECK: sub [[ZERO:r[0-9]+]], [[HI]], [[LO]], asr #31
+; CHECK: cmp [[HI]], [[LO]], asr #31
+; CHECK: movne [[ZERO]], #1
+  %prod = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 0, i32 %in)
+  %overflow = extractvalue { i32, i1 } %prod, 1
+  ret i1 %overflow
+}
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)