diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -642,48 +642,84 @@
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
-  // For promoting iN -> iM, this can be expanded by
-  // 1. ANY_EXTEND iN to iM
-  // 2. SHL by M-N
-  // 3. [US][ADD|SUB]SAT
-  // 4. L/ASHR by M-N
+  // If the promoted type is legal, we can convert this to:
+  //   1. ANY_EXTEND iN to iM
+  //   2. SHL by M-N
+  //   3. [US][ADD|SUB]SAT
+  //   4. L/ASHR by M-N
+  // Else it is more efficient to convert this to a min and a max
+  // operation in the higher precision arithmetic.
   SDLoc dl(N);
   SDValue Op1 = N->getOperand(0);
   SDValue Op2 = N->getOperand(1);
   unsigned OldBits = Op1.getScalarValueSizeInBits();
 
   unsigned Opcode = N->getOpcode();
-  unsigned ShiftOp;
-  switch (Opcode) {
-  case ISD::SADDSAT:
-  case ISD::SSUBSAT:
-    ShiftOp = ISD::SRA;
-    break;
-  case ISD::UADDSAT:
-  case ISD::USUBSAT:
-    ShiftOp = ISD::SRL;
-    break;
-  default:
-    llvm_unreachable("Expected opcode to be signed or unsigned saturation "
-                     "addition or subtraction");
-  }
-
-  SDValue Op1Promoted = GetPromotedInteger(Op1);
-  SDValue Op2Promoted = GetPromotedInteger(Op2);
 
+  SDValue Op1Promoted, Op2Promoted;
+  if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
+    Op1Promoted = ZExtPromotedInteger(Op1);
+    Op2Promoted = ZExtPromotedInteger(Op2);
+  } else {
+    Op1Promoted = SExtPromotedInteger(Op1);
+    Op2Promoted = SExtPromotedInteger(Op2);
+  }
   EVT PromotedType = Op1Promoted.getValueType();
   unsigned NewBits = PromotedType.getScalarSizeInBits();
-  unsigned SHLAmount = NewBits - OldBits;
-  EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
-  SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
-  Op1Promoted =
-      DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
-  Op2Promoted =
-      DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
-
-  SDValue Result =
-      DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
-  return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+
+  if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+    unsigned ShiftOp;
+    switch (Opcode) {
+    case ISD::SADDSAT:
+    case ISD::SSUBSAT:
+      ShiftOp = ISD::SRA;
+      break;
+    case ISD::UADDSAT:
+    case ISD::USUBSAT:
+      ShiftOp = ISD::SRL;
+      break;
+    default:
+      llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+                       "addition or subtraction");
+    }
+
+    unsigned SHLAmount = NewBits - OldBits;
+    EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+    SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+    Op1Promoted =
+        DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+    Op2Promoted =
+        DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+    SDValue Result =
+        DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+    return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+  } else {
+    if (Opcode == ISD::USUBSAT) {
+      SDValue Max =
+          DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);
+      return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);
+    }
+
+    if (Opcode == ISD::UADDSAT) {
+      APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+      SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+      SDValue Add =
+          DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+      return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+    }
+
+    unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
+    APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
+    APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
+    SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
+    SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+    SDValue Result =
+        DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+    Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+    Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+    return Result;
+  }
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll
--- a/llvm/test/CodeGen/AArch64/sadd_sat.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll
@@ -39,14 +39,14 @@
 define i16 @func16(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: func16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #16
-; CHECK-NEXT:    adds w10, w8, w1, lsl #16
-; CHECK-NEXT:    mov w9, #2147483647
-; CHECK-NEXT:    cmp w10, #0 // =0
-; CHECK-NEXT:    cinv w9, w9, ge
-; CHECK-NEXT:    adds w8, w8, w1, lsl #16
-; CHECK-NEXT:    csel w8, w9, w8, vs
-; CHECK-NEXT:    asr w0, w8, #16
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    mov w9, #32767
+; CHECK-NEXT:    add w8, w8, w1, sxth
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    csel w8, w8, w9, lt
+; CHECK-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-NEXT:    mov w9, #-32768
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y);
   ret i16 %tmp;
@@ -55,14 +55,14 @@
 define i8 @func8(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: func8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #24
-; CHECK-NEXT:    adds w10, w8, w1, lsl #24
-; CHECK-NEXT:    mov w9, #2147483647
-; CHECK-NEXT:    cmp w10, #0 // =0
-; CHECK-NEXT:    cinv w9, w9, ge
-; CHECK-NEXT:    adds w8, w8, w1, lsl #24
-; CHECK-NEXT:    csel w8, w9, w8, vs
-; CHECK-NEXT:    asr w0, w8, #24
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    add w8, w8, w1, sxtb
+; CHECK-NEXT:    mov w9, #127
+; CHECK-NEXT:    cmp w8, #127 // =127
+; CHECK-NEXT:    csel w8, w8, w9, lt
+; CHECK-NEXT:    cmn w8, #128 // =128
+; CHECK-NEXT:    mov w9, #-128
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y);
   ret i8 %tmp;
@@ -71,14 +71,15 @@
 define i4 @func3(i4 %x, i4 %y) nounwind {
 ; CHECK-LABEL: func3:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #28
-; CHECK-NEXT:    adds w10, w8, w1, lsl #28
-; CHECK-NEXT:    mov w9, #2147483647
-; CHECK-NEXT:    cmp w10, #0 // =0
-; CHECK-NEXT:    cinv w9, w9, ge
-; CHECK-NEXT:    adds w8, w8, w1, lsl #28
-; CHECK-NEXT:    csel w8, w9, w8, vs
-; CHECK-NEXT:    asr w0, w8, #28
+; CHECK-NEXT:    lsl w8, w1, #28
+; CHECK-NEXT:    sbfx w9, w0, #0, #4
+; CHECK-NEXT:    add w8, w9, w8, asr #28
+; CHECK-NEXT:    mov w10, #7
+; CHECK-NEXT:    cmp w8, #7 // =7
+; CHECK-NEXT:    csel w8, w8, w10, lt
+; CHECK-NEXT:    cmn w8, #8 // =8
+; CHECK-NEXT:    mov w9, #-8
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y);
   ret i4 %tmp;
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll
--- a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll
@@ -41,15 +41,15 @@
 define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
 ; CHECK-LABEL: func16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #16
-; CHECK-NEXT:    adds w11, w9, w8, lsl #16
-; CHECK-NEXT:    mov w10, #2147483647
-; CHECK-NEXT:    cmp w11, #0 // =0
-; CHECK-NEXT:    cinv w10, w10, ge
-; CHECK-NEXT:    adds w8, w9, w8, lsl #16
-; CHECK-NEXT:    csel w8, w10, w8, vs
-; CHECK-NEXT:    asr w0, w8, #16
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    mov w10, #32767
+; CHECK-NEXT:    add w8, w8, w9, sxth
+; CHECK-NEXT:    cmp w8, w10
+; CHECK-NEXT:    csel w8, w8, w10, lt
+; CHECK-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-NEXT:    mov w9, #-32768
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %a = mul i16 %y, %z
   %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a)
@@ -59,15 +59,15 @@
 define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
 ; CHECK-LABEL: func8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #24
-; CHECK-NEXT:    adds w11, w9, w8, lsl #24
-; CHECK-NEXT:    mov w10, #2147483647
-; CHECK-NEXT:    cmp w11, #0 // =0
-; CHECK-NEXT:    cinv w10, w10, ge
-; CHECK-NEXT:    adds w8, w9, w8, lsl #24
-; CHECK-NEXT:    csel w8, w10, w8, vs
-; CHECK-NEXT:    asr w0, w8, #24
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    add w8, w8, w9, sxtb
+; CHECK-NEXT:    mov w10, #127
+; CHECK-NEXT:    cmp w8, #127 // =127
+; CHECK-NEXT:    csel w8, w8, w10, lt
+; CHECK-NEXT:    cmn w8, #128 // =128
+; CHECK-NEXT:    mov w9, #-128
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %a = mul i8 %y, %z
   %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a)
@@ -77,15 +77,16 @@
 define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
 ; CHECK-LABEL: func4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #28
-; CHECK-NEXT:    adds w11, w9, w8, lsl #28
-; CHECK-NEXT:    mov w10, #2147483647
-; CHECK-NEXT:    cmp w11, #0 // =0
-; CHECK-NEXT:    cinv w10, w10, ge
-; CHECK-NEXT:    adds w8, w9, w8, lsl #28
-; CHECK-NEXT:    csel w8, w10, w8, vs
-; CHECK-NEXT:    asr w0, w8, #28
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    sbfx w8, w0, #0, #4
+; CHECK-NEXT:    lsl w9, w9, #28
+; CHECK-NEXT:    add w8, w8, w9, asr #28
+; CHECK-NEXT:    mov w10, #7
+; CHECK-NEXT:    cmp w8, #7 // =7
+; CHECK-NEXT:    csel w8, w8, w10, lt
+; CHECK-NEXT:    cmn w8, #8 // =8
+; CHECK-NEXT:    mov w9, #-8
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %a)
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -232,34 +232,27 @@
 define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
 ; CHECK-LABEL: v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    ldrb w9, [x1]
-; CHECK-NEXT:    ldrb w10, [x0, #1]
-; CHECK-NEXT:    ldrb w11, [x1, #1]
-; CHECK-NEXT:    ldrb w12, [x0, #2]
+; CHECK-NEXT:    ldrsb w8, [x0]
+; CHECK-NEXT:    ldrsb w9, [x1]
+; CHECK-NEXT:    ldrsb w10, [x0, #1]
+; CHECK-NEXT:    ldrsb w11, [x1, #1]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ldrb w8, [x1, #2]
 ; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    ldrsb w8, [x0, #2]
+; CHECK-NEXT:    ldrsb w9, [x1, #2]
 ; CHECK-NEXT:    mov v0.h[1], w10
-; CHECK-NEXT:    ldrb w9, [x0, #3]
-; CHECK-NEXT:    ldrb w10, [x1, #3]
 ; CHECK-NEXT:    mov v1.h[1], w11
-; CHECK-NEXT:    mov v0.h[2], w12
-; CHECK-NEXT:    mov v1.h[2], w8
-; CHECK-NEXT:    mov v0.h[3], w9
-; CHECK-NEXT:    mov v1.h[3], w10
-; CHECK-NEXT:    shl v1.4h, v1.4h, #8
-; CHECK-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-NEXT:    add v3.4h, v0.4h, v1.4h
-; CHECK-NEXT:    cmlt v4.4h, v3.4h, #0
-; CHECK-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-NEXT:    cmlt v1.4h, v1.4h, #0
-; CHECK-NEXT:    cmgt v0.4h, v0.4h, v3.4h
-; CHECK-NEXT:    mvn v5.8b, v4.8b
-; CHECK-NEXT:    bsl v2.8b, v4.8b, v5.8b
-; CHECK-NEXT:    eor v0.8b, v1.8b, v0.8b
-; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
-; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    ldrsb w10, [x0, #3]
+; CHECK-NEXT:    ldrsb w11, [x1, #3]
+; CHECK-NEXT:    mov v0.h[2], w8
+; CHECK-NEXT:    mov v1.h[2], w9
+; CHECK-NEXT:    mov v0.h[3], w10
+; CHECK-NEXT:    mov v1.h[3], w11
+; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    movi v1.4h, #127
+; CHECK-NEXT:    smin v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    mvni v1.4h, #127
+; CHECK-NEXT:    smax v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-NEXT:    str s0, [x2]
 ; CHECK-NEXT:    ret
@@ -273,26 +266,19 @@
 define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
 ; CHECK-LABEL: v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    ldrb w9, [x1]
-; CHECK-NEXT:    ldrb w10, [x0, #1]
-; CHECK-NEXT:    ldrb w11, [x1, #1]
+; CHECK-NEXT:    ldrsb w8, [x0]
+; CHECK-NEXT:    ldrsb w9, [x1]
+; CHECK-NEXT:    ldrsb w10, [x0, #1]
+; CHECK-NEXT:    ldrsb w11, [x1, #1]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    mov v2.s[1], w11
-; CHECK-NEXT:    shl v2.2s, v2.2s, #24
-; CHECK-NEXT:    shl v0.2s, v0.2s, #24
-; CHECK-NEXT:    add v3.2s, v0.2s, v2.2s
-; CHECK-NEXT:    cmlt v4.2s, v3.2s, #0
-; CHECK-NEXT:    mvni v1.2s, #128, lsl #24
-; CHECK-NEXT:    cmlt v2.2s, v2.2s, #0
-; CHECK-NEXT:    cmgt v0.2s, v0.2s, v3.2s
-; CHECK-NEXT:    mvn v5.8b, v4.8b
-; CHECK-NEXT:    eor v0.8b, v2.8b, v0.8b
-; CHECK-NEXT:    bsl v1.8b, v4.8b, v5.8b
-; CHECK-NEXT:    bsl v0.8b, v1.8b, v3.8b
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #24
+; CHECK-NEXT:    mov v1.s[1], w11
+; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    movi v1.2s, #127
+; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    mvni v1.2s, #127
+; CHECK-NEXT:    smax v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strb w8, [x2, #1]
@@ -331,26 +317,19 @@
 define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
 ; CHECK-LABEL: v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    ldrh w9, [x1]
-; CHECK-NEXT:    ldrh w10, [x0, #2]
-; CHECK-NEXT:    ldrh w11, [x1, #2]
+; CHECK-NEXT:    ldrsh w8, [x0]
+; CHECK-NEXT:    ldrsh w9, [x1]
+; CHECK-NEXT:    ldrsh w10, [x0, #2]
+; CHECK-NEXT:    ldrsh w11, [x1, #2]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    mov v2.s[1], w11
-; CHECK-NEXT:    shl v2.2s, v2.2s, #16
-; CHECK-NEXT:    shl v0.2s, v0.2s, #16
-; CHECK-NEXT:    add v3.2s, v0.2s, v2.2s
-; CHECK-NEXT:    cmlt v4.2s, v3.2s, #0
-; CHECK-NEXT:    mvni v1.2s, #128, lsl #24
-; CHECK-NEXT:    cmlt v2.2s, v2.2s, #0
-; CHECK-NEXT:    cmgt v0.2s, v0.2s, v3.2s
-; CHECK-NEXT:    mvn v5.8b, v4.8b
-; CHECK-NEXT:    eor v0.8b, v2.8b, v0.8b
-; CHECK-NEXT:    bsl v1.8b, v4.8b, v5.8b
-; CHECK-NEXT:    bsl v0.8b, v1.8b, v3.8b
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #16
+; CHECK-NEXT:    mov v1.s[1], w11
+; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    movi v1.2s, #127, msl #8
+; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    mvni v1.2s, #127, msl #8
+; CHECK-NEXT:    smax v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strh w8, [x2, #2]
@@ -462,18 +441,14 @@
 define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
 ; CHECK-LABEL: v16i4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v1.16b, v1.16b, #4
 ; CHECK-NEXT:    shl v0.16b, v0.16b, #4
-; CHECK-NEXT:    add v3.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmlt v4.16b, v3.16b, #0
-; CHECK-NEXT:    movi v2.16b, #127
-; CHECK-NEXT:    cmlt v1.16b, v1.16b, #0
-; CHECK-NEXT:    cmgt v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    mvn v5.16b, v4.16b
-; CHECK-NEXT:    bsl v2.16b, v4.16b, v5.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
+; CHECK-NEXT:    shl v1.16b, v1.16b, #4
 ; CHECK-NEXT:    sshr v0.16b, v0.16b, #4
+; CHECK-NEXT:    movi v2.16b, #7
+; CHECK-NEXT:    ssra v0.16b, v1.16b, #4
+; CHECK-NEXT:    smin v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    movi v1.16b, #248
+; CHECK-NEXT:    smax v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %z = call <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
   ret <16 x i4> %z
@@ -482,18 +457,14 @@
 define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
 ; CHECK-LABEL: v16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v1.16b, v1.16b, #7
 ; CHECK-NEXT:    shl v0.16b, v0.16b, #7
-; CHECK-NEXT:    add v3.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmlt v4.16b, v3.16b, #0
-; CHECK-NEXT:    movi v2.16b, #127
-; CHECK-NEXT:    cmlt v1.16b, v1.16b, #0
-; CHECK-NEXT:    cmgt v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    mvn v5.16b, v4.16b
-; CHECK-NEXT:    bsl v2.16b, v4.16b, v5.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
+; CHECK-NEXT:    shl v1.16b, v1.16b, #7
 ; CHECK-NEXT:    sshr v0.16b, v0.16b, #7
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    ssra v0.16b, v1.16b, #7
+; CHECK-NEXT:    smin v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT:    smax v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
   ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll
--- a/llvm/test/CodeGen/AArch64/ssub_sat.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll
@@ -39,14 +39,14 @@
 define i16 @func16(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: func16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #16
-; CHECK-NEXT:    subs w10, w8, w1, lsl #16
-; CHECK-NEXT:    mov w9, #2147483647
-; CHECK-NEXT:    cmp w10, #0 // =0
-; CHECK-NEXT:    cinv w9, w9, ge
-; CHECK-NEXT:    subs w8, w8, w1, lsl #16
-; CHECK-NEXT:    csel w8, w9, w8, vs
-; CHECK-NEXT:    asr w0, w8, #16
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    mov w9, #32767
+; CHECK-NEXT:    sub w8, w8, w1, sxth
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    csel w8, w8, w9, lt
+; CHECK-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-NEXT:    mov w9, #-32768
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y);
   ret i16 %tmp;
@@ -55,14 +55,14 @@
 define i8 @func8(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: func8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #24
-; CHECK-NEXT:    subs w10, w8, w1, lsl #24
-; CHECK-NEXT:    mov w9, #2147483647
-; CHECK-NEXT:    cmp w10, #0 // =0
-; CHECK-NEXT:    cinv w9, w9, ge
-; CHECK-NEXT:    subs w8, w8, w1, lsl #24
-; CHECK-NEXT:    csel w8, w9, w8, vs
-; CHECK-NEXT:    asr w0, w8, #24
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    sub w8, w8, w1, sxtb
+; CHECK-NEXT:    mov w9, #127
+; CHECK-NEXT:    cmp w8, #127 // =127
+; CHECK-NEXT:    csel w8, w8, w9, lt
+; CHECK-NEXT:    cmn w8, #128 // =128
+; CHECK-NEXT:    mov w9, #-128
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y);
   ret i8 %tmp;
@@ -71,14 +71,15 @@
 define i4 @func3(i4 %x, i4 %y) nounwind {
 ; CHECK-LABEL: func3:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #28
-; CHECK-NEXT:    subs w10, w8, w1, lsl #28
-; CHECK-NEXT:    mov w9, #2147483647
-; CHECK-NEXT:    cmp w10, #0 // =0
-; CHECK-NEXT:    cinv w9, w9, ge
-; CHECK-NEXT:    subs w8, w8, w1, lsl #28
-; CHECK-NEXT:    csel w8, w9, w8, vs
-; CHECK-NEXT:    asr w0, w8, #28
+; CHECK-NEXT:    lsl w8, w1, #28
+; CHECK-NEXT:    sbfx w9, w0, #0, #4
+; CHECK-NEXT:    sub w8, w9, w8, asr #28
+; CHECK-NEXT:    mov w10, #7
+; CHECK-NEXT:    cmp w8, #7 // =7
+; CHECK-NEXT:    csel w8, w8, w10, lt
+; CHECK-NEXT:    cmn w8, #8 // =8
+; CHECK-NEXT:    mov w9, #-8
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y);
   ret i4 %tmp;
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll
--- a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll
@@ -41,15 +41,15 @@
 define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
 ; CHECK-LABEL: func16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #16
-; CHECK-NEXT:    subs w11, w9, w8, lsl #16
-; CHECK-NEXT:    mov w10, #2147483647
-; CHECK-NEXT:    cmp w11, #0 // =0
-; CHECK-NEXT:    cinv w10, w10, ge
-; CHECK-NEXT:    subs w8, w9, w8, lsl #16
-; CHECK-NEXT:    csel w8, w10, w8, vs
-; CHECK-NEXT:    asr w0, w8, #16
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    mov w10, #32767
+; CHECK-NEXT:    sub w8, w8, w9, sxth
+; CHECK-NEXT:    cmp w8, w10
+; CHECK-NEXT:    csel w8, w8, w10, lt
+; CHECK-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-NEXT:    mov w9, #-32768
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %a = mul i16 %y, %z
   %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %a)
@@ -59,15 +59,15 @@
 define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
 ; CHECK-LABEL: func8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #24
-; CHECK-NEXT:    subs w11, w9, w8, lsl #24
-; CHECK-NEXT:    mov w10, #2147483647
-; CHECK-NEXT:    cmp w11, #0 // =0
-; CHECK-NEXT:    cinv w10, w10, ge
-; CHECK-NEXT:    subs w8, w9, w8, lsl #24
-; CHECK-NEXT:    csel w8, w10, w8, vs
-; CHECK-NEXT:    asr w0, w8, #24
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    sub w8, w8, w9, sxtb
+; CHECK-NEXT:    mov w10, #127
+; CHECK-NEXT:    cmp w8, #127 // =127
+; CHECK-NEXT:    csel w8, w8, w10, lt
+; CHECK-NEXT:    cmn w8, #128 // =128
+; CHECK-NEXT:    mov w9, #-128
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %a = mul i8 %y, %z
   %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %a)
@@ -77,15 +77,16 @@
 define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
 ; CHECK-LABEL: func4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #28
-; CHECK-NEXT:    subs w11, w9, w8, lsl #28
-; CHECK-NEXT:    mov w10, #2147483647
-; CHECK-NEXT:    cmp w11, #0 // =0
-; CHECK-NEXT:    cinv w10, w10, ge
-; CHECK-NEXT:    subs w8, w9, w8, lsl #28
-; CHECK-NEXT:    csel w8, w10, w8, vs
-; CHECK-NEXT:    asr w0, w8, #28
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    sbfx w8, w0, #0, #4
+; CHECK-NEXT:    lsl w9, w9, #28
+; CHECK-NEXT:    sub w8, w8, w9, asr #28
+; CHECK-NEXT:    mov w10, #7
+; CHECK-NEXT:    cmp w8, #7 // =7
+; CHECK-NEXT:    csel w8, w8, w10, lt
+; CHECK-NEXT:    cmn w8, #8 // =8
+; CHECK-NEXT:    mov w9, #-8
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %a)
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -233,34 +233,27 @@
 define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
 ; CHECK-LABEL: v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    ldrb w9, [x1]
-; CHECK-NEXT:    ldrb w10, [x0, #1]
-; CHECK-NEXT:    ldrb w11, [x1, #1]
-; CHECK-NEXT:    ldrb w12, [x0, #2]
+; CHECK-NEXT:    ldrsb w8, [x0]
+; CHECK-NEXT:    ldrsb w9, [x1]
+; CHECK-NEXT:    ldrsb w10, [x0, #1]
+; CHECK-NEXT:    ldrsb w11, [x1, #1]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ldrb w8, [x1, #2]
 ; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    ldrsb w8, [x0, #2]
+; CHECK-NEXT:    ldrsb w9, [x1, #2]
 ; CHECK-NEXT:    mov v0.h[1], w10
-; CHECK-NEXT:    ldrb w9, [x0, #3]
-; CHECK-NEXT:    ldrb w10, [x1, #3]
 ; CHECK-NEXT:    mov v1.h[1], w11
-; CHECK-NEXT:    mov v0.h[2], w12
-; CHECK-NEXT:    mov v1.h[2], w8
-; CHECK-NEXT:    mov v0.h[3], w9
-; CHECK-NEXT:    mov v1.h[3], w10
-; CHECK-NEXT:    shl v1.4h, v1.4h, #8
-; CHECK-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-NEXT:    sub v3.4h, v0.4h, v1.4h
-; CHECK-NEXT:    cmlt v4.4h, v3.4h, #0
-; CHECK-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-NEXT:    cmgt v1.4h, v1.4h, #0
-; CHECK-NEXT:    cmgt v0.4h, v0.4h, v3.4h
-; CHECK-NEXT:    mvn v5.8b, v4.8b
-; CHECK-NEXT:    bsl v2.8b, v4.8b, v5.8b
-; CHECK-NEXT:    eor v0.8b, v1.8b, v0.8b
-; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
-; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    ldrsb w10, [x0, #3]
+; CHECK-NEXT:    ldrsb w11, [x1, #3]
+; CHECK-NEXT:    mov v0.h[2], w8
+; CHECK-NEXT:    mov v1.h[2], w9
+; CHECK-NEXT:    mov v0.h[3], w10
+; CHECK-NEXT:    mov v1.h[3], w11
+; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    movi v1.4h, #127
+; CHECK-NEXT:    smin v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    mvni v1.4h, #127
+; CHECK-NEXT:    smax v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-NEXT:    str s0, [x2]
 ; CHECK-NEXT:    ret
@@ -274,26 +267,19 @@
 define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
 ; CHECK-LABEL: v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    ldrb w9, [x1]
-; CHECK-NEXT:    ldrb w10, [x0, #1]
-; CHECK-NEXT:    ldrb w11, [x1, #1]
+; CHECK-NEXT:    ldrsb w8, [x0]
+; CHECK-NEXT:    ldrsb w9, [x1]
+; CHECK-NEXT:    ldrsb w10, [x0, #1]
+; CHECK-NEXT:    ldrsb w11, [x1, #1]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    mov v2.s[1], w11
-; CHECK-NEXT:    shl v2.2s, v2.2s, #24
-; CHECK-NEXT:    shl v0.2s, v0.2s, #24
-; CHECK-NEXT:    sub v3.2s, v0.2s, v2.2s
-; CHECK-NEXT:    cmlt v4.2s, v3.2s, #0
-; CHECK-NEXT:    mvni v1.2s, #128, lsl #24
-; CHECK-NEXT:    cmgt v2.2s, v2.2s, #0
-; CHECK-NEXT:    cmgt v0.2s, v0.2s, v3.2s
-; CHECK-NEXT:    mvn v5.8b, v4.8b
-; CHECK-NEXT:    eor v0.8b, v2.8b, v0.8b
-; CHECK-NEXT:    bsl v1.8b, v4.8b, v5.8b
-; CHECK-NEXT:    bsl v0.8b, v1.8b, v3.8b
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #24
+; CHECK-NEXT:    mov v1.s[1], w11
+; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    movi v1.2s, #127
+; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    mvni v1.2s, #127
+; CHECK-NEXT:    smax v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strb w8, [x2, #1]
@@ -332,26 +318,19 @@
 define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
 ; CHECK-LABEL: v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    ldrh w9, [x1]
-; CHECK-NEXT:    ldrh w10, [x0, #2]
-; CHECK-NEXT:    ldrh w11, [x1, #2]
+; CHECK-NEXT:    ldrsh w8, [x0]
+; CHECK-NEXT:    ldrsh w9, [x1]
+; CHECK-NEXT:    ldrsh w10, [x0, #2]
+; CHECK-NEXT:    ldrsh w11, [x1, #2]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    mov v2.s[1], w11
-; CHECK-NEXT:    shl v2.2s, v2.2s, #16
-; CHECK-NEXT:    shl v0.2s, v0.2s, #16
-; CHECK-NEXT:    sub v3.2s, v0.2s, v2.2s
-; CHECK-NEXT:    cmlt v4.2s, v3.2s, #0
-; CHECK-NEXT:    mvni v1.2s, #128, lsl #24
-; CHECK-NEXT:    cmgt v2.2s, v2.2s, #0
-; CHECK-NEXT:    cmgt v0.2s, v0.2s, v3.2s
-; CHECK-NEXT:    mvn v5.8b, v4.8b
-; CHECK-NEXT:    eor v0.8b, v2.8b, v0.8b
-; CHECK-NEXT:    bsl v1.8b, v4.8b, v5.8b
-; CHECK-NEXT:    bsl v0.8b, v1.8b, v3.8b
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #16
+; CHECK-NEXT:    mov v1.s[1], w11
+; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    movi v1.2s, #127, msl #8
+; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    mvni v1.2s, #127, msl #8
+; CHECK-NEXT:    smax v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strh w8, [x2, #2]
@@ -465,16 +444,13 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v1.16b, v1.16b, #4
 ; CHECK-NEXT:    shl v0.16b, v0.16b, #4
-; CHECK-NEXT:    sub v3.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmlt v4.16b, v3.16b, #0
-; CHECK-NEXT:    movi v2.16b, #127
-; CHECK-NEXT:    cmgt v1.16b, v1.16b, #0
-; CHECK-NEXT:    cmgt v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    mvn v5.16b, v4.16b
-; CHECK-NEXT:    bsl v2.16b, v4.16b, v5.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
+; CHECK-NEXT:    sshr v1.16b, v1.16b, #4
 ; CHECK-NEXT:    sshr v0.16b, v0.16b, #4
+; CHECK-NEXT:    movi v2.16b, #7
+; CHECK-NEXT:    sub v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    smin v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    movi v1.16b, #248
+; CHECK-NEXT:    smax v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %z = call <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
   ret <16 x i4> %z
@@ -485,16 +461,13 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v1.16b, v1.16b, #7
 ; CHECK-NEXT:    shl v0.16b, v0.16b, #7
-; CHECK-NEXT:    sub v3.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmlt v4.16b, v3.16b, #0
-; CHECK-NEXT:    movi v2.16b, #127
-; CHECK-NEXT:    cmgt v1.16b, v1.16b, #0
-; CHECK-NEXT:    cmgt v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    mvn v5.16b, v4.16b
-; CHECK-NEXT:    bsl v2.16b, v4.16b, v5.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
+; CHECK-NEXT:    sshr v1.16b, v1.16b, #7
 ; CHECK-NEXT:    sshr v0.16b, v0.16b, #7
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    sub v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    smin v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT:    smax v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
   ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat.ll b/llvm/test/CodeGen/AArch64/uadd_sat.ll
--- a/llvm/test/CodeGen/AArch64/uadd_sat.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat.ll
@@ -30,10 +30,11 @@
 define i16 @func16(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: func16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #16
-; CHECK-NEXT:    adds w8, w8, w1, lsl #16
-; CHECK-NEXT:    csinv w8, w8, wzr, lo
-; CHECK-NEXT:    lsr w0, w8, #16
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    add w8, w8, w1, uxth
+; CHECK-NEXT:    mov w9, #65535
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    csel w0, w8, w9, lo
 ; CHECK-NEXT:    ret
   %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y);
   ret i16 %tmp;
@@ -42,10 +43,11 @@
 define i8 @func8(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: func8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #24
-; CHECK-NEXT:    adds w8, w8, w1, lsl #24
-; CHECK-NEXT:    csinv w8, w8, wzr, lo
-; CHECK-NEXT:    lsr w0, w8, #24
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    add w8, w8, w1, uxtb
+; CHECK-NEXT:    cmp w8, #255 // =255
+; CHECK-NEXT:    mov w9, #255
+; CHECK-NEXT:    csel w0, w8, w9, lo
 ; CHECK-NEXT:    ret
   %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y);
   ret i8 %tmp;
@@ -54,10 +56,12 @@
 define i4 @func3(i4 %x, i4 %y) nounwind {
 ; CHECK-LABEL: func3:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #28
-; CHECK-NEXT:    adds w8, w8, w1, lsl #28
-; CHECK-NEXT:    csinv w8, w8, wzr, lo
-; CHECK-NEXT:    lsr w0, w8, #28
+; CHECK-NEXT:    and w8, w1, #0xf
+; CHECK-NEXT:    and w9, w0, #0xf
+; CHECK-NEXT:    add w8, w9, w8
+; CHECK-NEXT:    cmp w8, #15 // =15
+; CHECK-NEXT:    mov w9, #15
+; CHECK-NEXT:    csel w0, w8, w9, lo
 ; CHECK-NEXT:    ret
   %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y);
   ret i4 %tmp;
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll
--- a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll
@@ -33,11 +33,12 @@
 define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
 ; CHECK-LABEL: func16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #16
-; CHECK-NEXT:    adds w8, w9, w8, lsl #16
-; CHECK-NEXT:    csinv w8, w8, wzr, lo
-; CHECK-NEXT:    lsr w0, w8, #16
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    add w8, w8, w9, uxth
+; CHECK-NEXT:    mov w9, #65535
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    csel w0, w8, w9, lo
 ; CHECK-NEXT:    ret
   %a = mul i16 %y, %z
   %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %a)
@@ -47,11 +48,12 @@
 define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
 ; CHECK-LABEL: func8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #24
-; CHECK-NEXT:    adds w8, w9, w8, lsl #24
-; CHECK-NEXT:    csinv w8, w8, wzr, lo
-; CHECK-NEXT:    lsr w0, w8, #24
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    add w8, w8, w9, uxtb
+; CHECK-NEXT:    cmp w8, #255 // =255
+; CHECK-NEXT:    mov w9, #255
+; CHECK-NEXT:    csel w0, w8, w9, lo
 ; CHECK-NEXT:    ret
   %a = mul i8 %y, %z
   %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %a)
@@ -61,11 +63,13 @@
 define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
 ; CHECK-LABEL: func4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #28
-; CHECK-NEXT:    adds w8, w9, w8, lsl #28
-; CHECK-NEXT:    csinv w8, w8, wzr, lo
-; CHECK-NEXT:    lsr w0, w8, #28
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    and w8, w0, #0xf
+; CHECK-NEXT:    and w9, w9, #0xf
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    cmp w8, #15 // =15
+; CHECK-NEXT:    mov w9, #15
+; CHECK-NEXT:    csel w0, w8, w9, lo
 ; CHECK-NEXT:    ret
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %a)
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -142,28 +142,25 @@
 define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
 ; CHECK-LABEL: v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrb w9, [x1]
 ; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    ldrb w11, [x1, #1]
+; CHECK-NEXT:    ldrb w9, [x1]
 ; CHECK-NEXT:    ldrb w10, [x0, #1]
-; CHECK-NEXT:    fmov s1, w9
-; CHECK-NEXT:    ldrb w9, [x1, #2]
+; CHECK-NEXT:    ldrb w11, [x1, #1]
+; CHECK-NEXT:    ldrb w12, [x0, #2]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    ldrb w8, [x0, #2]
-; CHECK-NEXT:    mov v1.h[1], w11
-; CHECK-NEXT:    ldrb w11, [x1, #3]
+; CHECK-NEXT:    ldrb w8, [x1, #2]
+; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.h[1], w10
-; CHECK-NEXT:    ldrb w10, [x0, #3]
-; CHECK-NEXT:    mov v1.h[2], w9
-; CHECK-NEXT:    mov v0.h[2], w8
-; CHECK-NEXT:    mov v1.h[3], w11
-; CHECK-NEXT:    mov v0.h[3], w10
-; CHECK-NEXT:    shl v1.4h, v1.4h, #8
-; CHECK-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-NEXT:    mvn v2.8b, v1.8b
-; CHECK-NEXT:    umin v0.4h, v0.4h, v2.4h
+; CHECK-NEXT:    ldrb w9, [x0, #3]
+; CHECK-NEXT:    ldrb w10, [x1, #3]
+; CHECK-NEXT:    mov v1.h[1], w11
+; CHECK-NEXT:    mov v0.h[2], w12
+; CHECK-NEXT:    mov v1.h[2], w8
+; CHECK-NEXT:    mov v0.h[3], w9
+; CHECK-NEXT:    mov v1.h[3], w10
+; CHECK-NEXT:    movi d2, #0xff00ff00ff00ff
 ; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ushr v0.4h, v0.4h, #8
+; CHECK-NEXT:    umin v0.4h, v0.4h, v2.4h
 ; CHECK-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-NEXT:    str s0, [x2]
 ; CHECK-NEXT:    ret
@@ -177,20 +174,17 @@
 define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
 ; CHECK-LABEL: v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrb w9, [x1]
 ; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    ldrb w11, [x1, #1]
+; CHECK-NEXT:    ldrb w9, [x1]
 ; CHECK-NEXT:    ldrb w10, [x0, #1]
-; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    ldrb w11, [x1, #1]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v1.s[1], w11
+; CHECK-NEXT:    fmov s2, w9
 ; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    shl v1.2s, v1.2s, #24
-; CHECK-NEXT:    shl v0.2s, v0.2s, #24
-; CHECK-NEXT:    mvn v2.8b, v1.8b
-; CHECK-NEXT:    umin v0.2s, v0.2s, v2.2s
-; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #24
+; CHECK-NEXT:    mov v2.s[1], w11
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    add v0.2s, v0.2s, v2.2s
+; CHECK-NEXT:    umin v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strb w8, [x2, #1]
@@ -223,20 +217,17 @@
 define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
 ; CHECK-LABEL: v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrh w9, [x1]
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    ldrh w11, [x1, #2]
+; CHECK-NEXT:    ldrh w9, [x1]
 ; CHECK-NEXT:    ldrh w10, [x0, #2]
-; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    ldrh w11, [x1, #2]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v1.s[1], w11
+; CHECK-NEXT:    fmov s2, w9
 ; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    shl v1.2s, v1.2s, #16
-; CHECK-NEXT:    shl v0.2s, v0.2s, #16
-; CHECK-NEXT:    mvn v2.8b, v1.8b
-; CHECK-NEXT:    umin v0.2s, v0.2s, v2.2s
-; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #16
+; CHECK-NEXT:    mov v2.s[1], w11
+; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-NEXT:    add v0.2s, v0.2s, v2.2s
+; CHECK-NEXT:    umin v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strh w8, [x2, #2]
@@ -318,12 +309,11 @@
 define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
 ; CHECK-LABEL: v16i4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v1.16b, v1.16b, #4
-; CHECK-NEXT:    shl v0.16b, v0.16b, #4
-; CHECK-NEXT:    mvn v2.16b, v1.16b
-; CHECK-NEXT:    umin v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    movi v2.16b, #15
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    ushr v0.16b, v0.16b, #4
+; CHECK-NEXT:    umin v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %z = call <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
   ret <16 x i4> %z
@@ -332,12 +322,11 @@
 define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
 ; CHECK-LABEL: v16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v1.16b, v1.16b, #7
-; CHECK-NEXT:    shl v0.16b, v0.16b, #7
-; CHECK-NEXT:    mvn v2.16b, v1.16b
-; CHECK-NEXT:    umin v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    movi v2.16b, #1
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    ushr v0.16b, v0.16b, #7
+; CHECK-NEXT:    umin v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
   ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/AArch64/usub_sat.ll b/llvm/test/CodeGen/AArch64/usub_sat.ll
--- a/llvm/test/CodeGen/AArch64/usub_sat.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat.ll
@@ -30,10 +30,11 @@
 define i16 @func16(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: func16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #16
-; CHECK-NEXT:    subs w8, w8, w1, lsl #16
-; CHECK-NEXT:    csel w8, wzr, w8, lo
-; CHECK-NEXT:    lsr w0, w8, #16
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    and w9, w0, #0xffff
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    csel w9, w9, w8, hi
+; CHECK-NEXT:    sub w0, w9, w8
 ; CHECK-NEXT:    ret
   %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y);
   ret i16 %tmp;
@@ -42,10 +43,11 @@
 define i8 @func8(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: func8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #24
-; CHECK-NEXT:    subs w8, w8, w1, lsl #24
-; CHECK-NEXT:    csel w8, wzr, w8, lo
-; CHECK-NEXT:    lsr w0, w8, #24
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    csel w9, w9, w8, hi
+; CHECK-NEXT:    sub w0, w9, w8
 ; CHECK-NEXT:    ret
   %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y);
   ret i8 %tmp;
@@ -54,10 +56,11 @@
 define i4 @func3(i4 %x, i4 %y) nounwind {
 ; CHECK-LABEL: func3:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #28
-; CHECK-NEXT:    subs w8, w8, w1, lsl #28
-; CHECK-NEXT:    csel w8, wzr, w8, lo
-; CHECK-NEXT:    lsr w0, w8, #28
+; CHECK-NEXT:    and w8, w1, #0xf
+; CHECK-NEXT:    and w9, w0, #0xf
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    csel w9, w9, w8, hi
+; CHECK-NEXT:    sub w0, w9, w8
 ; CHECK-NEXT:    ret
   %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y);
   ret i4 %tmp;
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll
--- a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll
@@ -33,11 +33,12 @@
 define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
 ; CHECK-LABEL: func16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #16
-; CHECK-NEXT:    subs w8, w9, w8, lsl #16
-; CHECK-NEXT:    csel w8, wzr, w8, lo
-; CHECK-NEXT:    lsr w0, w8, #16
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    and w9, w9, #0xffff
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    csel w8, w8, w9, hi
+; CHECK-NEXT:    sub w0, w8, w9
 ; CHECK-NEXT:    ret
   %a = mul i16 %y, %z
   %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a)
@@ -47,11 +48,12 @@
 define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
 ; CHECK-LABEL: func8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #24
-; CHECK-NEXT:    subs w8, w9, w8, lsl #24
-; CHECK-NEXT:    csel w8, wzr, w8, lo
-; CHECK-NEXT:    lsr w0, w8, #24
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    and w9, w9, #0xff
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    csel w8, w8, w9, hi
+; CHECK-NEXT:    sub w0, w8, w9
 ; CHECK-NEXT:    ret
   %a = mul i8 %y, %z
   %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a)
@@ -61,11 +63,12 @@
 define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
 ; CHECK-LABEL: func4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w1, w2
-; CHECK-NEXT:    lsl w9, w0, #28
-; CHECK-NEXT:    subs w8, w9, w8, lsl #28
-; CHECK-NEXT:    csel w8, wzr, w8, lo
-; CHECK-NEXT:    lsr w0, w8, #28
+; CHECK-NEXT:    mul w9, w1, w2
+; CHECK-NEXT:    and w8, w0, #0xf
+; CHECK-NEXT:    and w9, w9, #0xf
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    csel w8, w8, w9, hi
+; CHECK-NEXT:    sub w0, w8, w9
 ; CHECK-NEXT:    ret
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a)
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -144,11 +144,8 @@
 ; CHECK-NEXT:    mov v1.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w10
 ; CHECK-NEXT:    mov v1.h[3], w11
-; CHECK-NEXT:    shl v1.4h, v1.4h, #8
-; CHECK-NEXT:    shl v0.4h, v0.4h, #8
 ; CHECK-NEXT:    umax v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ushr v0.4h, v0.4h, #8
 ; CHECK-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-NEXT:    str s0, [x2]
 ; CHECK-NEXT:    ret
@@ -170,11 +167,8 @@
 ; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.s[1], w10
 ; CHECK-NEXT:    mov v1.s[1], w11
-; CHECK-NEXT:    shl v1.2s, v1.2s, #24
-; CHECK-NEXT:    shl v0.2s, v0.2s, #24
 ; CHECK-NEXT:    umax v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #24
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strb w8, [x2, #1]
@@ -214,11 +208,8 @@
 ; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.s[1], w10
 ; CHECK-NEXT:    mov v1.s[1], w11
-; CHECK-NEXT:    shl v1.2s, v1.2s, #16
-; CHECK-NEXT:    shl v0.2s, v0.2s, #16
 ; CHECK-NEXT:    umax v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #16
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strh w8, [x2, #2]
@@ -295,11 +286,11 @@
 define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
 ; CHECK-LABEL: v16i4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v1.16b, v1.16b, #4
-; CHECK-NEXT:    shl v0.16b, v0.16b, #4
+; CHECK-NEXT:    movi v2.16b, #15
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    umax v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sub v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    ushr v0.16b, v0.16b, #4
 ; CHECK-NEXT:    ret
   %z = call <16 x i4> @llvm.usub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
   ret <16 x i4> %z
@@ -308,11 +299,11 @@
 define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
 ; CHECK-LABEL: v16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v1.16b, v1.16b, #7
-; CHECK-NEXT:    shl v0.16b, v0.16b, #7
+; CHECK-NEXT:    movi v2.16b, #1
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    umax v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sub v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    ushr v0.16b, v0.16b, #7
 ; CHECK-NEXT:    ret
   %z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
   ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll
--- a/llvm/test/CodeGen/ARM/sadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat.ll
@@ -210,67 +210,51 @@
 define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
 ; CHECK-T1-LABEL: func16:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r3, r1, #16
-; CHECK-T1-NEXT:    lsls r1, r0, #16
-; CHECK-T1-NEXT:    movs r2, #1
-; CHECK-T1-NEXT:    adds r0, r1, r3
-; CHECK-T1-NEXT:    mov r3, r2
-; CHECK-T1-NEXT:    bmi .LBB2_2
+; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_0
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    blt .LBB2_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    cmp r3, #0
-; CHECK-T1-NEXT:    bne .LBB2_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r2, r2, #31
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_1
 ; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvs .LBB2_5
-; CHECK-T1-NEXT:    b .LBB2_6
+; CHECK-T1-NEXT:    bgt .LBB2_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_4:
-; CHECK-T1-NEXT:    ldr r2, .LCPI2_0
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvc .LBB2_6
-; CHECK-T1-NEXT:  .LBB2_5:
-; CHECK-T1-NEXT:    mov r0, r2
-; CHECK-T1-NEXT:  .LBB2_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #16
 ; CHECK-T1-NEXT:    bx lr
 ; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
+; CHECK-T1-NEXT:  @ %bb.5:
 ; CHECK-T1-NEXT:  .LCPI2_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-T1-NEXT:    .long 32767 @ 0x7fff
+; CHECK-T1-NEXT:  .LCPI2_1:
+; CHECK-T1-NEXT:    .long 4294934528 @ 0xffff8000
 ;
 ; CHECK-T2-LABEL: func16:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r2, r0, #16
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #16
-; CHECK-T2-NEXT:    movs r2, #0
-; CHECK-T2-NEXT:    cmp r1, #0
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r2, #1
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #16
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r1
-; CHECK-T2-NEXT:    asrs r0, r3, #16
+; CHECK-T2-NEXT:    add r0, r1
+; CHECK-T2-NEXT:    movw r1, #32767
+; CHECK-T2-NEXT:    cmp r0, r1
+; CHECK-T2-NEXT:    it lt
+; CHECK-T2-NEXT:    movlt r1, r0
+; CHECK-T2-NEXT:    movw r0, #32768
+; CHECK-T2-NEXT:    cmn.w r1, #32768
+; CHECK-T2-NEXT:    movt r0, #65535
+; CHECK-T2-NEXT:    it gt
+; CHECK-T2-NEXT:    movgt r0, r1
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func16:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r2, r0, #16
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #16
-; CHECK-ARM-NEXT:    mov r2, #0
-; CHECK-ARM-NEXT:    cmp r1, #0
-; CHECK-ARM-NEXT:    movwmi r2, #1
-; CHECK-ARM-NEXT:    mov r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #16
-; CHECK-ARM-NEXT:    movvc r3, r1
-; CHECK-ARM-NEXT:    asr r0, r3, #16
+; CHECK-ARM-NEXT:    add r0, r0, r1
+; CHECK-ARM-NEXT:    movw r1, #32767
+; CHECK-ARM-NEXT:    cmp r0, r1
+; CHECK-ARM-NEXT:    movlt r1, r0
+; CHECK-ARM-NEXT:    movw r0, #32768
+; CHECK-ARM-NEXT:    movt r0, #65535
+; CHECK-ARM-NEXT:    cmn r1, #32768
+; CHECK-ARM-NEXT:    movgt r0, r1
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y)
   ret i16 %tmp
@@ -279,67 +263,39 @@
 define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
 ; CHECK-T1-LABEL: func8:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r3, r1, #24
-; CHECK-T1-NEXT:    lsls r1, r0, #24
-; CHECK-T1-NEXT:    movs r2, #1
-; CHECK-T1-NEXT:    adds r0, r1, r3
-; CHECK-T1-NEXT:    mov r3, r2
-; CHECK-T1-NEXT:    bmi .LBB3_2
+; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    movs r1, #127
+; CHECK-T1-NEXT:    cmp r0, #127
+; CHECK-T1-NEXT:    blt .LBB3_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    cmp r3, #0
-; CHECK-T1-NEXT:    bne .LBB3_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r2, r2, #31
+; CHECK-T1-NEXT:    mvns r1, r1
 ; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvs .LBB3_5
-; CHECK-T1-NEXT:    b .LBB3_6
+; CHECK-T1-NEXT:    bgt .LBB3_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_4:
-; CHECK-T1-NEXT:    ldr r2, .LCPI3_0
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvc .LBB3_6
-; CHECK-T1-NEXT:  .LBB3_5:
-; CHECK-T1-NEXT:    mov r0, r2
-; CHECK-T1-NEXT:  .LBB3_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #24
 ; CHECK-T1-NEXT:    bx lr
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
-; CHECK-T1-NEXT:  .LCPI3_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
 ; CHECK-T2-LABEL: func8:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r2, r0, #24
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #24
-; CHECK-T2-NEXT:    movs r2, #0
-; CHECK-T2-NEXT:    cmp r1, #0
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r2, #1
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #24
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r1
-; CHECK-T2-NEXT:    asrs r0, r3, #24
+; CHECK-T2-NEXT:    add r0, r1
+; CHECK-T2-NEXT:    cmp r0, #127
+; CHECK-T2-NEXT:    it ge
+; CHECK-T2-NEXT:    movge r0, #127
+; CHECK-T2-NEXT:    cmn.w r0, #128
+; CHECK-T2-NEXT:    it le
+; CHECK-T2-NEXT:    mvnle r0, #127
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func8:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r2, r0, #24
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #24
-; CHECK-ARM-NEXT:    mov r2, #0
-; CHECK-ARM-NEXT:    cmp r1, #0
-; CHECK-ARM-NEXT:    movwmi r2, #1
-; CHECK-ARM-NEXT:    mov r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #24
-; CHECK-ARM-NEXT:    movvc r3, r1
-; CHECK-ARM-NEXT:    asr r0, r3, #24
+; CHECK-ARM-NEXT:    add r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #127
+; CHECK-ARM-NEXT:    movge r0, #127
+; CHECK-ARM-NEXT:    cmn r0, #128
+; CHECK-ARM-NEXT:    mvnle r0, #127
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y)
   ret i8 %tmp
@@ -348,67 +304,39 @@
 define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
 ; CHECK-T1-LABEL: func3:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r3, r1, #28
-; CHECK-T1-NEXT:    lsls r1, r0, #28
-; CHECK-T1-NEXT:    movs r2, #1
-; CHECK-T1-NEXT:    adds r0, r1, r3
-; CHECK-T1-NEXT:    mov r3, r2
-; CHECK-T1-NEXT:    bmi .LBB4_2
+; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    movs r1, #7
+; CHECK-T1-NEXT:    cmp r0, #7
+; CHECK-T1-NEXT:    blt .LBB4_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    cmp r3, #0
-; CHECK-T1-NEXT:    bne .LBB4_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r2, r2, #31
+; CHECK-T1-NEXT:    mvns r1, r1
 ; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvs .LBB4_5
-; CHECK-T1-NEXT:    b .LBB4_6
+; CHECK-T1-NEXT:    bgt .LBB4_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_4:
-; CHECK-T1-NEXT:    ldr r2, .LCPI4_0
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvc .LBB4_6
-; CHECK-T1-NEXT:  .LBB4_5:
-; CHECK-T1-NEXT:    mov r0, r2
-; CHECK-T1-NEXT:  .LBB4_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #28
 ; CHECK-T1-NEXT:    bx lr
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
-; CHECK-T1-NEXT:  .LCPI4_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
 ; CHECK-T2-LABEL: func3:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r2, r0, #28
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #28
-; CHECK-T2-NEXT:    movs r2, #0
-; CHECK-T2-NEXT:    cmp r1, #0
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r2, #1
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #28
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r1
-; CHECK-T2-NEXT:    asrs r0, r3, #28
+; CHECK-T2-NEXT:    add r0, r1
+; CHECK-T2-NEXT:    cmp r0, #7
+; CHECK-T2-NEXT:    it ge
+; CHECK-T2-NEXT:    movge r0, #7
+; CHECK-T2-NEXT:    cmn.w r0, #8
+; CHECK-T2-NEXT:    it le
+; CHECK-T2-NEXT:    mvnle r0, #7
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func3:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r2, r0, #28
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #28
-; CHECK-ARM-NEXT:    mov r2, #0
-; CHECK-ARM-NEXT:    cmp r1, #0
-; CHECK-ARM-NEXT:    movwmi r2, #1
-; CHECK-ARM-NEXT:    mov r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #28
-; CHECK-ARM-NEXT:    movvc r3, r1
-; CHECK-ARM-NEXT:    asr r0, r3, #28
+; CHECK-ARM-NEXT:    add r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #7
+; CHECK-ARM-NEXT:    movge r0, #7
+; CHECK-ARM-NEXT:    cmn r0, #8
+; CHECK-ARM-NEXT:    mvnle r0, #7
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y)
   ret i4 %tmp
diff --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
--- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
@@ -217,69 +217,70 @@
 ; CHECK-T1-LABEL: func16:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r3, r1, #16
-; CHECK-T1-NEXT:    lsls r1, r0, #16
-; CHECK-T1-NEXT:    movs r2, #1
-; CHECK-T1-NEXT:    adds r0, r1, r3
-; CHECK-T1-NEXT:    mov r3, r2
-; CHECK-T1-NEXT:    bmi .LBB2_2
+; CHECK-T1-NEXT:    sxth r1, r1
+; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_0
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    blt .LBB2_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    cmp r3, #0
-; CHECK-T1-NEXT:    bne .LBB2_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r2, r2, #31
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_1
 ; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvs .LBB2_5
-; CHECK-T1-NEXT:    b .LBB2_6
+; CHECK-T1-NEXT:    bgt .LBB2_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_4:
-; CHECK-T1-NEXT:    ldr r2, .LCPI2_0
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvc .LBB2_6
-; CHECK-T1-NEXT:  .LBB2_5:
-; CHECK-T1-NEXT:    mov r0, r2
-; CHECK-T1-NEXT:  .LBB2_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #16
 ; CHECK-T1-NEXT:    bx lr
 ; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
+; CHECK-T1-NEXT:  @ %bb.5:
 ; CHECK-T1-NEXT:  .LCPI2_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-T1-NEXT:    .long 32767 @ 0x7fff
+; CHECK-T1-NEXT:  .LCPI2_1:
+; CHECK-T1-NEXT:    .long 4294934528 @ 0xffff8000
 ;
-; CHECK-T2-LABEL: func16:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r2, r0, #16
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #16
-; CHECK-T2-NEXT:    movs r2, #0
-; CHECK-T2-NEXT:    cmp r1, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r2, #1
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #16
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r1
-; CHECK-T2-NEXT:    asrs r0, r3, #16
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func16:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    muls r1, r2, r1
+; CHECK-T2NODSP-NEXT:    sxth r1, r1
+; CHECK-T2NODSP-NEXT:    add r0, r1
+; CHECK-T2NODSP-NEXT:    movw r1, #32767
+; CHECK-T2NODSP-NEXT:    cmp r0, r1
+; CHECK-T2NODSP-NEXT:    it lt
+; CHECK-T2NODSP-NEXT:    movlt r1, r0
+; CHECK-T2NODSP-NEXT:    movw r0, #32768
+; CHECK-T2NODSP-NEXT:    movt r0, #65535
+; CHECK-T2NODSP-NEXT:    cmn.w r1, #32768
+; CHECK-T2NODSP-NEXT:    it gt
+; CHECK-T2NODSP-NEXT:    movgt r0, r1
+; CHECK-T2NODSP-NEXT:    bx lr
+;
+; CHECK-T2DSP-LABEL: func16:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    muls r1, r2, r1
+; CHECK-T2DSP-NEXT:    sxtah r0, r0, r1
+; CHECK-T2DSP-NEXT:    movw r1, #32767
+; CHECK-T2DSP-NEXT:    cmp r0, r1
+; CHECK-T2DSP-NEXT:    it lt
+; CHECK-T2DSP-NEXT:    movlt r1, r0
+; CHECK-T2DSP-NEXT:    movw r0, #32768
+; CHECK-T2DSP-NEXT:    cmn.w r1, #32768
+; CHECK-T2DSP-NEXT:    movt r0, #65535
+; CHECK-T2DSP-NEXT:    it gt
+; CHECK-T2DSP-NEXT:    movgt r0, r1
+; CHECK-T2DSP-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func16:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    smulbb r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r2, r0, #16
-; CHECK-ARM-NEXT:    mov r3, #-2147483648
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #16
-; CHECK-ARM-NEXT:    mov r2, #0
-; CHECK-ARM-NEXT:    cmp r1, #0
-; CHECK-ARM-NEXT:    movwmi r2, #1
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #16
-; CHECK-ARM-NEXT:    movvc r3, r1
-; CHECK-ARM-NEXT:    asr r0, r3, #16
+; CHECK-ARM-NEXT:    sxtah r0, r0, r1
+; CHECK-ARM-NEXT:    movw r1, #32767
+; CHECK-ARM-NEXT:    cmp r0, r1
+; CHECK-ARM-NEXT:    movlt r1, r0
+; CHECK-ARM-NEXT:    movw r0, #32768
+; CHECK-ARM-NEXT:    movt r0, #65535
+; CHECK-ARM-NEXT:    cmn r1, #32768
+; CHECK-ARM-NEXT:    movgt r0, r1
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i16 %y, %z
   %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a)
@@ -290,69 +291,55 @@
 ; CHECK-T1-LABEL: func8:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r3, r1, #24
-; CHECK-T1-NEXT:    lsls r1, r0, #24
-; CHECK-T1-NEXT:    movs r2, #1
-; CHECK-T1-NEXT:    adds r0, r1, r3
-; CHECK-T1-NEXT:    mov r3, r2
-; CHECK-T1-NEXT:    bmi .LBB3_2
+; CHECK-T1-NEXT:    sxtb r1, r1
+; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    movs r1, #127
+; CHECK-T1-NEXT:    cmp r0, #127
+; CHECK-T1-NEXT:    blt .LBB3_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    cmp r3, #0
-; CHECK-T1-NEXT:    bne .LBB3_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r2, r2, #31
+; CHECK-T1-NEXT:    mvns r1, r1
 ; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvs .LBB3_5
-; CHECK-T1-NEXT:    b .LBB3_6
+; CHECK-T1-NEXT:    bgt .LBB3_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_4:
-; CHECK-T1-NEXT:    ldr r2, .LCPI3_0
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvc .LBB3_6
-; CHECK-T1-NEXT:  .LBB3_5:
-; CHECK-T1-NEXT:    mov r0, r2
-; CHECK-T1-NEXT:  .LBB3_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #24
 ; CHECK-T1-NEXT:    bx lr
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
-; CHECK-T1-NEXT:  .LCPI3_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
-; CHECK-T2-LABEL: func8:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r2, r0, #24
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #24
-; CHECK-T2-NEXT:    movs r2, #0
-; CHECK-T2-NEXT:    cmp r1, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r2, #1
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #24
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r1
-; CHECK-T2-NEXT:    asrs r0, r3, #24
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func8:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    muls r1, r2, r1
+; CHECK-T2NODSP-NEXT:    sxtb r1, r1
+; CHECK-T2NODSP-NEXT:    add r0, r1
+; CHECK-T2NODSP-NEXT:    cmp r0, #127
+; CHECK-T2NODSP-NEXT:    it ge
+; CHECK-T2NODSP-NEXT:    movge r0, #127
+; CHECK-T2NODSP-NEXT:    cmn.w r0, #128
+; CHECK-T2NODSP-NEXT:    it le
+; CHECK-T2NODSP-NEXT:    mvnle r0, #127
+; CHECK-T2NODSP-NEXT:    bx lr
+;
+; CHECK-T2DSP-LABEL: func8:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    muls r1, r2, r1
+; CHECK-T2DSP-NEXT:    sxtab r0, r0, r1
+; CHECK-T2DSP-NEXT:    cmp r0, #127
+; CHECK-T2DSP-NEXT:    it ge
+; CHECK-T2DSP-NEXT:    movge r0, #127
+; CHECK-T2DSP-NEXT:    cmn.w r0, #128
+; CHECK-T2DSP-NEXT:    it le
+; CHECK-T2DSP-NEXT:    mvnle r0, #127
+; CHECK-T2DSP-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func8:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    smulbb r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r2, r0, #24
-; CHECK-ARM-NEXT:    mov r3, #-2147483648
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #24
-; CHECK-ARM-NEXT:    mov r2, #0
-; CHECK-ARM-NEXT:    cmp r1, #0
-; CHECK-ARM-NEXT:    movwmi r2, #1
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #24
-; CHECK-ARM-NEXT:    movvc r3, r1
-; CHECK-ARM-NEXT:    asr r0, r3, #24
+; CHECK-ARM-NEXT:    sxtab r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #127
+; CHECK-ARM-NEXT:    movge r0, #127
+; CHECK-ARM-NEXT:    cmn r0, #128
+; CHECK-ARM-NEXT:    mvnle r0, #127
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i8 %y, %z
   %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a)
@@ -363,69 +350,45 @@
 ; CHECK-T1-LABEL: func4:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r3, r1, #28
-; CHECK-T1-NEXT:    lsls r1, r0, #28
-; CHECK-T1-NEXT:    movs r2, #1
-; CHECK-T1-NEXT:    adds r0, r1, r3
-; CHECK-T1-NEXT:    mov r3, r2
-; CHECK-T1-NEXT:    bmi .LBB4_2
+; CHECK-T1-NEXT:    lsls r1, r1, #28
+; CHECK-T1-NEXT:    asrs r1, r1, #28
+; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    movs r1, #7
+; CHECK-T1-NEXT:    cmp r0, #7
+; CHECK-T1-NEXT:    blt .LBB4_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    cmp r3, #0
-; CHECK-T1-NEXT:    bne .LBB4_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r2, r2, #31
+; CHECK-T1-NEXT:    mvns r1, r1
 ; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvs .LBB4_5
-; CHECK-T1-NEXT:    b .LBB4_6
+; CHECK-T1-NEXT:    bgt .LBB4_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_4:
-; CHECK-T1-NEXT:    ldr r2, .LCPI4_0
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bvc .LBB4_6
-; CHECK-T1-NEXT:  .LBB4_5:
-; CHECK-T1-NEXT:    mov r0, r2
-; CHECK-T1-NEXT:  .LBB4_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #28
 ; CHECK-T1-NEXT:    bx lr
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
-; CHECK-T1-NEXT:  .LCPI4_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
 ; CHECK-T2-LABEL: func4:
 ; CHECK-T2:       @ %bb.0:
 ; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r2, r0, #28
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #28
-; CHECK-T2-NEXT:    movs r2, #0
-; CHECK-T2-NEXT:    cmp r1, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r2, #1
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #28
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r1
-; CHECK-T2-NEXT:    asrs r0, r3, #28
+; CHECK-T2-NEXT:    lsls r1, r1, #28
+; CHECK-T2-NEXT:    add.w r0, r0, r1, asr #28
+; CHECK-T2-NEXT:    cmp r0, #7
+; CHECK-T2-NEXT:    it ge
+; CHECK-T2-NEXT:    movge r0, #7
+; CHECK-T2-NEXT:    cmn.w r0, #8
+; CHECK-T2-NEXT:    it le
+; CHECK-T2-NEXT:    mvnle r0, #7
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func4:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    smulbb r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r2, r0, #28
-; CHECK-ARM-NEXT:    mov r3, #-2147483648
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #28
-; CHECK-ARM-NEXT:    mov r2, #0
-; CHECK-ARM-NEXT:    cmp r1, #0
-; CHECK-ARM-NEXT:    movwmi r2, #1
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #28
-; CHECK-ARM-NEXT:    movvc r3, r1
-; CHECK-ARM-NEXT:    asr r0, r3, #28
+; CHECK-ARM-NEXT:    lsl r1, r1, #28
+; CHECK-ARM-NEXT:    add r0, r0, r1, asr #28
+; CHECK-ARM-NEXT:    cmp r0, #7
+; CHECK-ARM-NEXT:    movge r0, #7
+; CHECK-ARM-NEXT:    cmn r0, #8
+; CHECK-ARM-NEXT:    mvnle r0, #7
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %a)
diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll
--- a/llvm/test/CodeGen/ARM/ssub_sat.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat.ll
@@ -212,69 +212,51 @@
 define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
 ; CHECK-T1-LABEL: func16:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    .save {r4, lr}
-; CHECK-T1-NEXT:    push {r4, lr}
-; CHECK-T1-NEXT:    lsls r1, r1, #16
-; CHECK-T1-NEXT:    lsls r2, r0, #16
-; CHECK-T1-NEXT:    movs r3, #1
-; CHECK-T1-NEXT:    subs r0, r2, r1
-; CHECK-T1-NEXT:    mov r4, r3
-; CHECK-T1-NEXT:    bmi .LBB2_2
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_0
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    blt .LBB2_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r4, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    cmp r4, #0
-; CHECK-T1-NEXT:    bne .LBB2_4
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_1
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bgt .LBB2_4
 ; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r3, r3, #31
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvs .LBB2_5
-; CHECK-T1-NEXT:    b .LBB2_6
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_4:
-; CHECK-T1-NEXT:    ldr r3, .LCPI2_0
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvc .LBB2_6
-; CHECK-T1-NEXT:  .LBB2_5:
-; CHECK-T1-NEXT:    mov r0, r3
-; CHECK-T1-NEXT:  .LBB2_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #16
-; CHECK-T1-NEXT:    pop {r4, pc}
+; CHECK-T1-NEXT:    bx lr
 ; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
+; CHECK-T1-NEXT:  @ %bb.5:
 ; CHECK-T1-NEXT:  .LCPI2_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-T1-NEXT:    .long 32767 @ 0x7fff
+; CHECK-T1-NEXT:  .LCPI2_1:
+; CHECK-T1-NEXT:    .long 4294934528 @ 0xffff8000
 ;
 ; CHECK-T2-LABEL: func16:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r0, r0, #16
-; CHECK-T2-NEXT:    sub.w r12, r0, r1, lsl #16
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    cmp.w r12, #0
-; CHECK-T2-NEXT:    mov.w r2, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r2, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #16
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r2, r12
-; CHECK-T2-NEXT:    asrs r0, r2, #16
+; CHECK-T2-NEXT:    subs r0, r0, r1
+; CHECK-T2-NEXT:    movw r1, #32767
+; CHECK-T2-NEXT:    cmp r0, r1
+; CHECK-T2-NEXT:    it lt
+; CHECK-T2-NEXT:    movlt r1, r0
+; CHECK-T2-NEXT:    movw r0, #32768
+; CHECK-T2-NEXT:    cmn.w r1, #32768
+; CHECK-T2-NEXT:    movt r0, #65535
+; CHECK-T2-NEXT:    it gt
+; CHECK-T2-NEXT:    movgt r0, r1
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func16:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r0, r0, #16
-; CHECK-ARM-NEXT:    sub r12, r0, r1, lsl #16
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    cmp r12, #0
-; CHECK-ARM-NEXT:    movwmi r3, #1
-; CHECK-ARM-NEXT:    mov r2, #-2147483648
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #16
-; CHECK-ARM-NEXT:    movvc r2, r12
-; CHECK-ARM-NEXT:    asr r0, r2, #16
+; CHECK-ARM-NEXT:    sub r0, r0, r1
+; CHECK-ARM-NEXT:    movw r1, #32767
+; CHECK-ARM-NEXT:    cmp r0, r1
+; CHECK-ARM-NEXT:    movlt r1, r0
+; CHECK-ARM-NEXT:    movw r0, #32768
+; CHECK-ARM-NEXT:    movt r0, #65535
+; CHECK-ARM-NEXT:    cmn r1, #32768
+; CHECK-ARM-NEXT:    movgt r0, r1
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y)
   ret i16 %tmp
@@ -283,69 +265,39 @@
 define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
 ; CHECK-T1-LABEL: func8:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    .save {r4, lr}
-; CHECK-T1-NEXT:    push {r4, lr}
-; CHECK-T1-NEXT:    lsls r1, r1, #24
-; CHECK-T1-NEXT:    lsls r2, r0, #24
-; CHECK-T1-NEXT:    movs r3, #1
-; CHECK-T1-NEXT:    subs r0, r2, r1
-; CHECK-T1-NEXT:    mov r4, r3
-; CHECK-T1-NEXT:    bmi .LBB3_2
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    movs r1, #127
+; CHECK-T1-NEXT:    cmp r0, #127
+; CHECK-T1-NEXT:    blt .LBB3_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r4, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    cmp r4, #0
-; CHECK-T1-NEXT:    bne .LBB3_4
+; CHECK-T1-NEXT:    mvns r1, r1
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bgt .LBB3_4
 ; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r3, r3, #31
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvs .LBB3_5
-; CHECK-T1-NEXT:    b .LBB3_6
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_4:
-; CHECK-T1-NEXT:    ldr r3, .LCPI3_0
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvc .LBB3_6
-; CHECK-T1-NEXT:  .LBB3_5:
-; CHECK-T1-NEXT:    mov r0, r3
-; CHECK-T1-NEXT:  .LBB3_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #24
-; CHECK-T1-NEXT:    pop {r4, pc}
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
-; CHECK-T1-NEXT:  .LCPI3_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func8:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r0, r0, #24
-; CHECK-T2-NEXT:    sub.w r12, r0, r1, lsl #24
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    cmp.w r12, #0
-; CHECK-T2-NEXT:    mov.w r2, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r2, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #24
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r2, r12
-; CHECK-T2-NEXT:    asrs r0, r2, #24
+; CHECK-T2-NEXT:    subs r0, r0, r1
+; CHECK-T2-NEXT:    cmp r0, #127
+; CHECK-T2-NEXT:    it ge
+; CHECK-T2-NEXT:    movge r0, #127
+; CHECK-T2-NEXT:    cmn.w r0, #128
+; CHECK-T2-NEXT:    it le
+; CHECK-T2-NEXT:    mvnle r0, #127
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func8:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r0, r0, #24
-; CHECK-ARM-NEXT:    sub r12, r0, r1, lsl #24
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    cmp r12, #0
-; CHECK-ARM-NEXT:    movwmi r3, #1
-; CHECK-ARM-NEXT:    mov r2, #-2147483648
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #24
-; CHECK-ARM-NEXT:    movvc r2, r12
-; CHECK-ARM-NEXT:    asr r0, r2, #24
+; CHECK-ARM-NEXT:    sub r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #127
+; CHECK-ARM-NEXT:    movge r0, #127
+; CHECK-ARM-NEXT:    cmn r0, #128
+; CHECK-ARM-NEXT:    mvnle r0, #127
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y)
   ret i8 %tmp
@@ -354,69 +306,39 @@
 define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
 ; CHECK-T1-LABEL: func3:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    .save {r4, lr}
-; CHECK-T1-NEXT:    push {r4, lr}
-; CHECK-T1-NEXT:    lsls r1, r1, #28
-; CHECK-T1-NEXT:    lsls r2, r0, #28
-; CHECK-T1-NEXT:    movs r3, #1
-; CHECK-T1-NEXT:    subs r0, r2, r1
-; CHECK-T1-NEXT:    mov r4, r3
-; CHECK-T1-NEXT:    bmi .LBB4_2
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    movs r1, #7
+; CHECK-T1-NEXT:    cmp r0, #7
+; CHECK-T1-NEXT:    blt .LBB4_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r4, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    cmp r4, #0
-; CHECK-T1-NEXT:    bne .LBB4_4
+; CHECK-T1-NEXT:    mvns r1, r1
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bgt .LBB4_4
 ; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r3, r3, #31
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvs .LBB4_5
-; CHECK-T1-NEXT:    b .LBB4_6
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_4:
-; CHECK-T1-NEXT:    ldr r3, .LCPI4_0
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvc .LBB4_6
-; CHECK-T1-NEXT:  .LBB4_5:
-; CHECK-T1-NEXT:    mov r0, r3
-; CHECK-T1-NEXT:  .LBB4_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #28
-; CHECK-T1-NEXT:    pop {r4, pc}
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
-; CHECK-T1-NEXT:  .LCPI4_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func3:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r0, r0, #28
-; CHECK-T2-NEXT:    sub.w r12, r0, r1, lsl #28
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    cmp.w r12, #0
-; CHECK-T2-NEXT:    mov.w r2, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r2, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #28
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r2, r12
-; CHECK-T2-NEXT:    asrs r0, r2, #28
+; CHECK-T2-NEXT:    subs r0, r0, r1
+; CHECK-T2-NEXT:    cmp r0, #7
+; CHECK-T2-NEXT:    it ge
+; CHECK-T2-NEXT:    movge r0, #7
+; CHECK-T2-NEXT:    cmn.w r0, #8
+; CHECK-T2-NEXT:    it le
+; CHECK-T2-NEXT:    mvnle r0, #7
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func3:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r0, r0, #28
-; CHECK-ARM-NEXT:    sub r12, r0, r1, lsl #28
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    cmp r12, #0
-; CHECK-ARM-NEXT:    movwmi r3, #1
-; CHECK-ARM-NEXT:    mov r2, #-2147483648
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #28
-; CHECK-ARM-NEXT:    movvc r2, r12
-; CHECK-ARM-NEXT:    asr r0, r2, #28
+; CHECK-ARM-NEXT:    sub r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #7
+; CHECK-ARM-NEXT:    movge r0, #7
+; CHECK-ARM-NEXT:    cmn r0, #8
+; CHECK-ARM-NEXT:    mvnle r0, #7
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y)
   ret i4 %tmp
diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
--- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
@@ -222,72 +222,57 @@
 define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounwind {
 ; CHECK-T1-LABEL: func16:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    .save {r4, lr}
-; CHECK-T1-NEXT:    push {r4, lr}
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r1, r1, #16
-; CHECK-T1-NEXT:    lsls r2, r0, #16
-; CHECK-T1-NEXT:    movs r3, #1
-; CHECK-T1-NEXT:    subs r0, r2, r1
-; CHECK-T1-NEXT:    mov r4, r3
-; CHECK-T1-NEXT:    bmi .LBB2_2
+; CHECK-T1-NEXT:    sxth r1, r1
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_0
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    blt .LBB2_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r4, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    cmp r4, #0
-; CHECK-T1-NEXT:    bne .LBB2_4
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_1
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bgt .LBB2_4
 ; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r3, r3, #31
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvs .LBB2_5
-; CHECK-T1-NEXT:    b .LBB2_6
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_4:
-; CHECK-T1-NEXT:    ldr r3, .LCPI2_0
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvc .LBB2_6
-; CHECK-T1-NEXT:  .LBB2_5:
-; CHECK-T1-NEXT:    mov r0, r3
-; CHECK-T1-NEXT:  .LBB2_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #16
-; CHECK-T1-NEXT:    pop {r4, pc}
+; CHECK-T1-NEXT:    bx lr
 ; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
+; CHECK-T1-NEXT:  @ %bb.5:
 ; CHECK-T1-NEXT:  .LCPI2_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-T1-NEXT:    .long 32767 @ 0x7fff
+; CHECK-T1-NEXT:  .LCPI2_1:
+; CHECK-T1-NEXT:    .long 4294934528 @ 0xffff8000
 ;
 ; CHECK-T2-LABEL: func16:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    mul r12, r1, r2
-; CHECK-T2-NEXT:    lsls r0, r0, #16
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    mov.w r1, #-2147483648
-; CHECK-T2-NEXT:    sub.w r2, r0, r12, lsl #16
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r1, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r0, r12, lsl #16
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r1, r2
-; CHECK-T2-NEXT:    asrs r0, r1, #16
+; CHECK-T2-NEXT:    muls r1, r2, r1
+; CHECK-T2-NEXT:    sxth r1, r1
+; CHECK-T2-NEXT:    subs r0, r0, r1
+; CHECK-T2-NEXT:    movw r1, #32767
+; CHECK-T2-NEXT:    cmp r0, r1
+; CHECK-T2-NEXT:    it lt
+; CHECK-T2-NEXT:    movlt r1, r0
+; CHECK-T2-NEXT:    movw r0, #32768
+; CHECK-T2-NEXT:    movt r0, #65535
+; CHECK-T2-NEXT:    cmn.w r1, #32768
+; CHECK-T2-NEXT:    it gt
+; CHECK-T2-NEXT:    movgt r0, r1
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func16:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    smulbb r12, r1, r2
-; CHECK-ARM-NEXT:    lsl r0, r0, #16
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    mov r1, #-2147483648
-; CHECK-ARM-NEXT:    sub r2, r0, r12, lsl #16
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    movwmi r3, #1
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r0, r12, lsl #16
-; CHECK-ARM-NEXT:    movvc r1, r2
-; CHECK-ARM-NEXT:    asr r0, r1, #16
+; CHECK-ARM-NEXT:    smulbb r1, r1, r2
+; CHECK-ARM-NEXT:    sxth r1, r1
+; CHECK-ARM-NEXT:    sub r0, r0, r1
+; CHECK-ARM-NEXT:    movw r1, #32767
+; CHECK-ARM-NEXT:    cmp r0, r1
+; CHECK-ARM-NEXT:    movlt r1, r0
+; CHECK-ARM-NEXT:    movw r0, #32768
+; CHECK-ARM-NEXT:    movt r0, #65535
+; CHECK-ARM-NEXT:    cmn r1, #32768
+; CHECK-ARM-NEXT:    movgt r0, r1
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i16 %y, %z
   %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %a)
@@ -297,72 +282,45 @@
 define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 ; CHECK-T1-LABEL: func8:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    .save {r4, lr}
-; CHECK-T1-NEXT:    push {r4, lr}
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r1, r1, #24
-; CHECK-T1-NEXT:    lsls r2, r0, #24
-; CHECK-T1-NEXT:    movs r3, #1
-; CHECK-T1-NEXT:    subs r0, r2, r1
-; CHECK-T1-NEXT:    mov r4, r3
-; CHECK-T1-NEXT:    bmi .LBB3_2
+; CHECK-T1-NEXT:    sxtb r1, r1
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    movs r1, #127
+; CHECK-T1-NEXT:    cmp r0, #127
+; CHECK-T1-NEXT:    blt .LBB3_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r4, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    cmp r4, #0
-; CHECK-T1-NEXT:    bne .LBB3_4
+; CHECK-T1-NEXT:    mvns r1, r1
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bgt .LBB3_4
 ; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r3, r3, #31
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvs .LBB3_5
-; CHECK-T1-NEXT:    b .LBB3_6
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_4:
-; CHECK-T1-NEXT:    ldr r3, .LCPI3_0
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvc .LBB3_6
-; CHECK-T1-NEXT:  .LBB3_5:
-; CHECK-T1-NEXT:    mov r0, r3
-; CHECK-T1-NEXT:  .LBB3_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #24
-; CHECK-T1-NEXT:    pop {r4, pc}
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
-; CHECK-T1-NEXT:  .LCPI3_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func8:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    mul r12, r1, r2
-; CHECK-T2-NEXT:    lsls r0, r0, #24
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    mov.w r1, #-2147483648
-; CHECK-T2-NEXT:    sub.w r2, r0, r12, lsl #24
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r1, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r0, r12, lsl #24
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r1, r2
-; CHECK-T2-NEXT:    asrs r0, r1, #24
+; CHECK-T2-NEXT:    muls r1, r2, r1
+; CHECK-T2-NEXT:    sxtb r1, r1
+; CHECK-T2-NEXT:    subs r0, r0, r1
+; CHECK-T2-NEXT:    cmp r0, #127
+; CHECK-T2-NEXT:    it ge
+; CHECK-T2-NEXT:    movge r0, #127
+; CHECK-T2-NEXT:    cmn.w r0, #128
+; CHECK-T2-NEXT:    it le
+; CHECK-T2-NEXT:    mvnle r0, #127
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func8:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    smulbb r12, r1, r2
-; CHECK-ARM-NEXT:    lsl r0, r0, #24
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    mov r1, #-2147483648
-; CHECK-ARM-NEXT:    sub r2, r0, r12, lsl #24
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    movwmi r3, #1
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r0, r12, lsl #24
-; CHECK-ARM-NEXT:    movvc r1, r2
-; CHECK-ARM-NEXT:    asr r0, r1, #24
+; CHECK-ARM-NEXT:    smulbb r1, r1, r2
+; CHECK-ARM-NEXT:    sxtb r1, r1
+; CHECK-ARM-NEXT:    sub r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #127
+; CHECK-ARM-NEXT:    movge r0, #127
+; CHECK-ARM-NEXT:    cmn r0, #128
+; CHECK-ARM-NEXT:    mvnle r0, #127
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i8 %y, %z
   %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %a)
@@ -372,72 +330,46 @@
 define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
 ; CHECK-T1-LABEL: func4:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    .save {r4, lr}
-; CHECK-T1-NEXT:    push {r4, lr}
 ; CHECK-T1-NEXT:    muls r1, r2, r1
 ; CHECK-T1-NEXT:    lsls r1, r1, #28
-; CHECK-T1-NEXT:    lsls r2, r0, #28
-; CHECK-T1-NEXT:    movs r3, #1
-; CHECK-T1-NEXT:    subs r0, r2, r1
-; CHECK-T1-NEXT:    mov r4, r3
-; CHECK-T1-NEXT:    bmi .LBB4_2
+; CHECK-T1-NEXT:    asrs r1, r1, #28
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    movs r1, #7
+; CHECK-T1-NEXT:    cmp r0, #7
+; CHECK-T1-NEXT:    blt .LBB4_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r4, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    cmp r4, #0
-; CHECK-T1-NEXT:    bne .LBB4_4
+; CHECK-T1-NEXT:    mvns r1, r1
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bgt .LBB4_4
 ; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    lsls r3, r3, #31
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvs .LBB4_5
-; CHECK-T1-NEXT:    b .LBB4_6
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_4:
-; CHECK-T1-NEXT:    ldr r3, .LCPI4_0
-; CHECK-T1-NEXT:    cmp r2, r1
-; CHECK-T1-NEXT:    bvc .LBB4_6
-; CHECK-T1-NEXT:  .LBB4_5:
-; CHECK-T1-NEXT:    mov r0, r3
-; CHECK-T1-NEXT:  .LBB4_6:
-; CHECK-T1-NEXT:    asrs r0, r0, #28
-; CHECK-T1-NEXT:    pop {r4, pc}
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.7:
-; CHECK-T1-NEXT:  .LCPI4_0:
-; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func4:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    mul r12, r1, r2
-; CHECK-T2-NEXT:    lsls r0, r0, #28
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    mov.w r1, #-2147483648
-; CHECK-T2-NEXT:    sub.w r2, r0, r12, lsl #28
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r1, #-2147483648
-; CHECK-T2-NEXT:    cmp.w r0, r12, lsl #28
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r1, r2
-; CHECK-T2-NEXT:    asrs r0, r1, #28
+; CHECK-T2-NEXT:    muls r1, r2, r1
+; CHECK-T2-NEXT:    lsls r1, r1, #28
+; CHECK-T2-NEXT:    sub.w r0, r0, r1, asr #28
+; CHECK-T2-NEXT:    cmp r0, #7
+; CHECK-T2-NEXT:    it ge
+; CHECK-T2-NEXT:    movge r0, #7
+; CHECK-T2-NEXT:    cmn.w r0, #8
+; CHECK-T2-NEXT:    it le
+; CHECK-T2-NEXT:    mvnle r0, #7
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func4:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    smulbb r12, r1, r2
-; CHECK-ARM-NEXT:    lsl r0, r0, #28
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    mov r1, #-2147483648
-; CHECK-ARM-NEXT:    sub r2, r0, r12, lsl #28
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    movwmi r3, #1
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r0, r12, lsl #28
-; CHECK-ARM-NEXT:    movvc r1, r2
-; CHECK-ARM-NEXT:    asr r0, r1, #28
+; CHECK-ARM-NEXT:    smulbb r1, r1, r2
+; CHECK-ARM-NEXT:    lsl r1, r1, #28
+; CHECK-ARM-NEXT:    sub r0, r0, r1, asr #28
+; CHECK-ARM-NEXT:    cmp r0, #7
+; CHECK-ARM-NEXT:    movge r0, #7
+; CHECK-ARM-NEXT:    cmn r0, #8
+; CHECK-ARM-NEXT:    mvnle r0, #7
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %a)
diff --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll
--- a/llvm/test/CodeGen/ARM/uadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/uadd_sat.ll
@@ -93,34 +93,34 @@
 define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind {
 ; CHECK-T1-LABEL: func16:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r1, r1, #16
-; CHECK-T1-NEXT:    lsls r0, r0, #16
 ; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_0
+; CHECK-T1-NEXT:    cmp r0, r1
 ; CHECK-T1-NEXT:    blo .LBB2_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    mvns r0, r0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #16
 ; CHECK-T1-NEXT:    bx lr
+; CHECK-T1-NEXT:    .p2align 2
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:  .LCPI2_0:
+; CHECK-T1-NEXT:    .long 65535 @ 0xffff
 ;
 ; CHECK-T2-LABEL: func16:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r2, r0, #16
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #16
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #16
+; CHECK-T2-NEXT:    add r1, r0
+; CHECK-T2-NEXT:    movw r0, #65535
+; CHECK-T2-NEXT:    cmp r1, r0
 ; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo.w r1, #-1
-; CHECK-T2-NEXT:    lsrs r0, r1, #16
+; CHECK-T2-NEXT:    movlo r0, r1
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func16:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r2, r0, #16
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #16
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #16
-; CHECK-ARM-NEXT:    mvnlo r1, #0
-; CHECK-ARM-NEXT:    lsr r0, r1, #16
+; CHECK-ARM-NEXT:    add r1, r0, r1
+; CHECK-ARM-NEXT:    movw r0, #65535
+; CHECK-ARM-NEXT:    cmp r1, r0
+; CHECK-ARM-NEXT:    movlo r0, r1
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y)
   ret i16 %tmp
@@ -129,34 +129,27 @@
 define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
 ; CHECK-T1-LABEL: func8:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r1, r1, #24
-; CHECK-T1-NEXT:    lsls r0, r0, #24
 ; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    cmp r0, #255
 ; CHECK-T1-NEXT:    blo .LBB3_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    mvns r0, r0
+; CHECK-T1-NEXT:    movs r0, #255
 ; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #24
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func8:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r2, r0, #24
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #24
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #24
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo.w r1, #-1
-; CHECK-T2-NEXT:    lsrs r0, r1, #24
+; CHECK-T2-NEXT:    add r0, r1
+; CHECK-T2-NEXT:    cmp r0, #255
+; CHECK-T2-NEXT:    it hs
+; CHECK-T2-NEXT:    movhs r0, #255
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func8:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r2, r0, #24
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #24
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #24
-; CHECK-ARM-NEXT:    mvnlo r1, #0
-; CHECK-ARM-NEXT:    lsr r0, r1, #24
+; CHECK-ARM-NEXT:    add r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #255
+; CHECK-ARM-NEXT:    movhs r0, #255
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y)
   ret i8 %tmp
@@ -165,34 +158,27 @@
 define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
 ; CHECK-T1-LABEL: func3:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r1, r1, #28
-; CHECK-T1-NEXT:    lsls r0, r0, #28
 ; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    cmp r0, #15
 ; CHECK-T1-NEXT:    blo .LBB4_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    mvns r0, r0
+; CHECK-T1-NEXT:    movs r0, #15
 ; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #28
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func3:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r2, r0, #28
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #28
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #28
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo.w r1, #-1
-; CHECK-T2-NEXT:    lsrs r0, r1, #28
+; CHECK-T2-NEXT:    add r0, r1
+; CHECK-T2-NEXT:    cmp r0, #15
+; CHECK-T2-NEXT:    it hs
+; CHECK-T2-NEXT:    movhs r0, #15
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func3:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r2, r0, #28
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #28
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #28
-; CHECK-ARM-NEXT:    mvnlo r1, #0
-; CHECK-ARM-NEXT:    lsr r0, r1, #28
+; CHECK-ARM-NEXT:    add r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #15
+; CHECK-ARM-NEXT:    movhs r0, #15
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y)
   ret i4 %tmp
diff --git a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
--- a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
@@ -102,36 +102,48 @@
 ; CHECK-T1-LABEL: func16:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r1, r1, #16
-; CHECK-T1-NEXT:    lsls r0, r0, #16
+; CHECK-T1-NEXT:    uxth r1, r1
 ; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    ldr r1, .LCPI2_0
+; CHECK-T1-NEXT:    cmp r0, r1
 ; CHECK-T1-NEXT:    blo .LBB2_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    mvns r0, r0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #16
 ; CHECK-T1-NEXT:    bx lr
+; CHECK-T1-NEXT:    .p2align 2
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:  .LCPI2_0:
+; CHECK-T1-NEXT:    .long 65535 @ 0xffff
 ;
-; CHECK-T2-LABEL: func16:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r2, r0, #16
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #16
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #16
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo.w r1, #-1
-; CHECK-T2-NEXT:    lsrs r0, r1, #16
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func16:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    muls r1, r2, r1
+; CHECK-T2NODSP-NEXT:    uxth r1, r1
+; CHECK-T2NODSP-NEXT:    add r1, r0
+; CHECK-T2NODSP-NEXT:    movw r0, #65535
+; CHECK-T2NODSP-NEXT:    cmp r1, r0
+; CHECK-T2NODSP-NEXT:    it lo
+; CHECK-T2NODSP-NEXT:    movlo r0, r1
+; CHECK-T2NODSP-NEXT:    bx lr
+;
+; CHECK-T2DSP-LABEL: func16:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    muls r1, r2, r1
+; CHECK-T2DSP-NEXT:    uxtah r1, r0, r1
+; CHECK-T2DSP-NEXT:    movw r0, #65535
+; CHECK-T2DSP-NEXT:    cmp r1, r0
+; CHECK-T2DSP-NEXT:    it lo
+; CHECK-T2DSP-NEXT:    movlo r0, r1
+; CHECK-T2DSP-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func16:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    mul r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r2, r0, #16
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #16
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #16
-; CHECK-ARM-NEXT:    mvnlo r1, #0
-; CHECK-ARM-NEXT:    lsr r0, r1, #16
+; CHECK-ARM-NEXT:    uxtah r1, r0, r1
+; CHECK-ARM-NEXT:    movw r0, #65535
+; CHECK-ARM-NEXT:    cmp r1, r0
+; CHECK-ARM-NEXT:    movlo r0, r1
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i16 %y, %z
   %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %a)
@@ -142,36 +154,40 @@
 ; CHECK-T1-LABEL: func8:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r1, r1, #24
-; CHECK-T1-NEXT:    lsls r0, r0, #24
+; CHECK-T1-NEXT:    uxtb r1, r1
 ; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    cmp r0, #255
 ; CHECK-T1-NEXT:    blo .LBB3_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    mvns r0, r0
+; CHECK-T1-NEXT:    movs r0, #255
 ; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #24
 ; CHECK-T1-NEXT:    bx lr
 ;
-; CHECK-T2-LABEL: func8:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r2, r0, #24
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #24
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #24
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo.w r1, #-1
-; CHECK-T2-NEXT:    lsrs r0, r1, #24
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func8:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    muls r1, r2, r1
+; CHECK-T2NODSP-NEXT:    uxtb r1, r1
+; CHECK-T2NODSP-NEXT:    add r0, r1
+; CHECK-T2NODSP-NEXT:    cmp r0, #255
+; CHECK-T2NODSP-NEXT:    it hs
+; CHECK-T2NODSP-NEXT:    movhs r0, #255
+; CHECK-T2NODSP-NEXT:    bx lr
+;
+; CHECK-T2DSP-LABEL: func8:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    muls r1, r2, r1
+; CHECK-T2DSP-NEXT:    uxtab r0, r0, r1
+; CHECK-T2DSP-NEXT:    cmp r0, #255
+; CHECK-T2DSP-NEXT:    it hs
+; CHECK-T2DSP-NEXT:    movhs r0, #255
+; CHECK-T2DSP-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func8:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    smulbb r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r2, r0, #24
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #24
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #24
-; CHECK-ARM-NEXT:    mvnlo r1, #0
-; CHECK-ARM-NEXT:    lsr r0, r1, #24
+; CHECK-ARM-NEXT:    uxtab r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #255
+; CHECK-ARM-NEXT:    movhs r0, #255
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i8 %y, %z
   %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %a)
@@ -182,36 +198,33 @@
 ; CHECK-T1-LABEL: func4:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r1, r1, #28
-; CHECK-T1-NEXT:    lsls r0, r0, #28
+; CHECK-T1-NEXT:    movs r2, #15
+; CHECK-T1-NEXT:    ands r1, r2
 ; CHECK-T1-NEXT:    adds r0, r0, r1
+; CHECK-T1-NEXT:    cmp r0, #15
 ; CHECK-T1-NEXT:    blo .LBB4_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    mvns r0, r0
+; CHECK-T1-NEXT:    mov r0, r2
 ; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #28
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func4:
 ; CHECK-T2:       @ %bb.0:
 ; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r2, r0, #28
-; CHECK-T2-NEXT:    add.w r1, r2, r1, lsl #28
-; CHECK-T2-NEXT:    cmp.w r1, r0, lsl #28
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo.w r1, #-1
-; CHECK-T2-NEXT:    lsrs r0, r1, #28
+; CHECK-T2-NEXT:    and r1, r1, #15
+; CHECK-T2-NEXT:    add r0, r1
+; CHECK-T2-NEXT:    cmp r0, #15
+; CHECK-T2-NEXT:    it hs
+; CHECK-T2-NEXT:    movhs r0, #15
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func4:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    smulbb r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r2, r0, #28
-; CHECK-ARM-NEXT:    add r1, r2, r1, lsl #28
-; CHECK-ARM-NEXT:    cmp r1, r0, lsl #28
-; CHECK-ARM-NEXT:    mvnlo r1, #0
-; CHECK-ARM-NEXT:    lsr r0, r1, #28
+; CHECK-ARM-NEXT:    and r1, r1, #15
+; CHECK-ARM-NEXT:    add r0, r0, r1
+; CHECK-ARM-NEXT:    cmp r0, #15
+; CHECK-ARM-NEXT:    movhs r0, #15
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %a)
diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll
--- a/llvm/test/CodeGen/ARM/usub_sat.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat.ll
@@ -93,33 +93,30 @@
 define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind {
 ; CHECK-T1-LABEL: func16:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r1, r1, #16
-; CHECK-T1-NEXT:    lsls r0, r0, #16
-; CHECK-T1-NEXT:    subs r0, r0, r1
-; CHECK-T1-NEXT:    bhs .LBB2_2
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bhi .LBB2_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #16
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    uxth r0, r0
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func16:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r0, r0, #16
-; CHECK-T2-NEXT:    sub.w r2, r0, r1, lsl #16
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #16
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo r2, #0
-; CHECK-T2-NEXT:    lsrs r0, r2, #16
+; CHECK-T2-NEXT:    cmp r0, r1
+; CHECK-T2-NEXT:    it ls
+; CHECK-T2-NEXT:    movls r0, r1
+; CHECK-T2-NEXT:    subs r0, r0, r1
+; CHECK-T2-NEXT:    uxth r0, r0
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func16:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r0, r0, #16
-; CHECK-ARM-NEXT:    sub r2, r0, r1, lsl #16
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #16
-; CHECK-ARM-NEXT:    movlo r2, #0
-; CHECK-ARM-NEXT:    lsr r0, r2, #16
+; CHECK-ARM-NEXT:    cmp r0, r1
+; CHECK-ARM-NEXT:    movls r0, r1
+; CHECK-ARM-NEXT:    sub r0, r0, r1
+; CHECK-ARM-NEXT:    uxth r0, r0
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y)
   ret i16 %tmp
@@ -128,33 +125,30 @@
 define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
 ; CHECK-T1-LABEL: func8:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r1, r1, #24
-; CHECK-T1-NEXT:    lsls r0, r0, #24
-; CHECK-T1-NEXT:    subs r0, r0, r1
-; CHECK-T1-NEXT:    bhs .LBB3_2
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bhi .LBB3_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #24
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    uxtb r0, r0
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func8:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r0, r0, #24
-; CHECK-T2-NEXT:    sub.w r2, r0, r1, lsl #24
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #24
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo r2, #0
-; CHECK-T2-NEXT:    lsrs r0, r2, #24
+; CHECK-T2-NEXT:    cmp r0, r1
+; CHECK-T2-NEXT:    it ls
+; CHECK-T2-NEXT:    movls r0, r1
+; CHECK-T2-NEXT:    subs r0, r0, r1
+; CHECK-T2-NEXT:    uxtb r0, r0
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func8:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r0, r0, #24
-; CHECK-ARM-NEXT:    sub r2, r0, r1, lsl #24
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #24
-; CHECK-ARM-NEXT:    movlo r2, #0
-; CHECK-ARM-NEXT:    lsr r0, r2, #24
+; CHECK-ARM-NEXT:    cmp r0, r1
+; CHECK-ARM-NEXT:    movls r0, r1
+; CHECK-ARM-NEXT:    sub r0, r0, r1
+; CHECK-ARM-NEXT:    uxtb r0, r0
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y)
   ret i8 %tmp
@@ -163,33 +157,31 @@
 define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
 ; CHECK-T1-LABEL: func3:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    lsls r1, r1, #28
-; CHECK-T1-NEXT:    lsls r0, r0, #28
-; CHECK-T1-NEXT:    subs r0, r0, r1
-; CHECK-T1-NEXT:    bhs .LBB4_2
+; CHECK-T1-NEXT:    cmp r0, r1
+; CHECK-T1-NEXT:    bhi .LBB4_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
+; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #28
+; CHECK-T1-NEXT:    subs r1, r0, r1
+; CHECK-T1-NEXT:    movs r0, #15
+; CHECK-T1-NEXT:    ands r0, r1
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func3:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    lsls r0, r0, #28
-; CHECK-T2-NEXT:    sub.w r2, r0, r1, lsl #28
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #28
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo r2, #0
-; CHECK-T2-NEXT:    lsrs r0, r2, #28
+; CHECK-T2-NEXT:    cmp r0, r1
+; CHECK-T2-NEXT:    it ls
+; CHECK-T2-NEXT:    movls r0, r1
+; CHECK-T2-NEXT:    subs r0, r0, r1
+; CHECK-T2-NEXT:    and r0, r0, #15
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func3:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    lsl r0, r0, #28
-; CHECK-ARM-NEXT:    sub r2, r0, r1, lsl #28
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #28
-; CHECK-ARM-NEXT:    movlo r2, #0
-; CHECK-ARM-NEXT:    lsr r0, r2, #28
+; CHECK-ARM-NEXT:    cmp r0, r1
+; CHECK-ARM-NEXT:    movls r0, r1
+; CHECK-ARM-NEXT:    sub r0, r0, r1
+; CHECK-ARM-NEXT:    and r0, r0, #15
 ; CHECK-ARM-NEXT:    bx lr
   %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y)
   ret i4 %tmp
diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
--- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
@@ -104,35 +104,35 @@
 ; CHECK-T1-LABEL: func16:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r1, r1, #16
-; CHECK-T1-NEXT:    lsls r0, r0, #16
-; CHECK-T1-NEXT:    subs r0, r0, r1
-; CHECK-T1-NEXT:    bhs .LBB2_2
+; CHECK-T1-NEXT:    uxth r2, r1
+; CHECK-T1-NEXT:    cmp r0, r2
+; CHECK-T1-NEXT:    bhi .LBB2_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
+; CHECK-T1-NEXT:    mov r0, r2
 ; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #16
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    uxth r0, r0
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func16:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r0, r0, #16
-; CHECK-T2-NEXT:    sub.w r2, r0, r1, lsl #16
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #16
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo r2, #0
-; CHECK-T2-NEXT:    lsrs r0, r2, #16
+; CHECK-T2-NEXT:    mul r3, r1, r2
+; CHECK-T2-NEXT:    uxth r3, r3
+; CHECK-T2-NEXT:    cmp r0, r3
+; CHECK-T2-NEXT:    it hi
+; CHECK-T2-NEXT:    movhi r3, r0
+; CHECK-T2-NEXT:    mls r0, r1, r2, r3
+; CHECK-T2-NEXT:    uxth r0, r0
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func16:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    mul r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r0, r0, #16
-; CHECK-ARM-NEXT:    sub r2, r0, r1, lsl #16
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #16
-; CHECK-ARM-NEXT:    movlo r2, #0
-; CHECK-ARM-NEXT:    lsr r0, r2, #16
+; CHECK-ARM-NEXT:    mul r3, r1, r2
+; CHECK-ARM-NEXT:    uxth r3, r3
+; CHECK-ARM-NEXT:    cmp r0, r3
+; CHECK-ARM-NEXT:    movhi r3, r0
+; CHECK-ARM-NEXT:    mls r0, r1, r2, r3
+; CHECK-ARM-NEXT:    uxth r0, r0
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i16 %y, %z
   %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a)
@@ -143,35 +143,35 @@
 ; CHECK-T1-LABEL: func8:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r1, r1, #24
-; CHECK-T1-NEXT:    lsls r0, r0, #24
-; CHECK-T1-NEXT:    subs r0, r0, r1
-; CHECK-T1-NEXT:    bhs .LBB3_2
+; CHECK-T1-NEXT:    uxtb r2, r1
+; CHECK-T1-NEXT:    cmp r0, r2
+; CHECK-T1-NEXT:    bhi .LBB3_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
+; CHECK-T1-NEXT:    mov r0, r2
 ; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #24
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    uxtb r0, r0
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func8:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r0, r0, #24
-; CHECK-T2-NEXT:    sub.w r2, r0, r1, lsl #24
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #24
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo r2, #0
-; CHECK-T2-NEXT:    lsrs r0, r2, #24
+; CHECK-T2-NEXT:    mul r3, r1, r2
+; CHECK-T2-NEXT:    uxtb r3, r3
+; CHECK-T2-NEXT:    cmp r0, r3
+; CHECK-T2-NEXT:    it hi
+; CHECK-T2-NEXT:    movhi r3, r0
+; CHECK-T2-NEXT:    mls r0, r1, r2, r3
+; CHECK-T2-NEXT:    uxtb r0, r0
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func8:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    smulbb r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r0, r0, #24
-; CHECK-ARM-NEXT:    sub r2, r0, r1, lsl #24
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #24
-; CHECK-ARM-NEXT:    movlo r2, #0
-; CHECK-ARM-NEXT:    lsr r0, r2, #24
+; CHECK-ARM-NEXT:    smulbb r3, r1, r2
+; CHECK-ARM-NEXT:    uxtb r3, r3
+; CHECK-ARM-NEXT:    cmp r0, r3
+; CHECK-ARM-NEXT:    movhi r3, r0
+; CHECK-ARM-NEXT:    mls r0, r1, r2, r3
+; CHECK-ARM-NEXT:    uxtb r0, r0
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i8 %y, %z
   %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a)
@@ -182,35 +182,37 @@
 ; CHECK-T1-LABEL: func4:
 ; CHECK-T1:       @ %bb.0:
 ; CHECK-T1-NEXT:    muls r1, r2, r1
-; CHECK-T1-NEXT:    lsls r1, r1, #28
-; CHECK-T1-NEXT:    lsls r0, r0, #28
-; CHECK-T1-NEXT:    subs r0, r0, r1
-; CHECK-T1-NEXT:    bhs .LBB4_2
+; CHECK-T1-NEXT:    movs r2, #15
+; CHECK-T1-NEXT:    mov r3, r1
+; CHECK-T1-NEXT:    ands r3, r2
+; CHECK-T1-NEXT:    cmp r0, r3
+; CHECK-T1-NEXT:    bhi .LBB4_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    movs r0, #0
+; CHECK-T1-NEXT:    mov r0, r3
 ; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    lsrs r0, r0, #28
+; CHECK-T1-NEXT:    subs r0, r0, r1
+; CHECK-T1-NEXT:    ands r0, r2
 ; CHECK-T1-NEXT:    bx lr
 ;
 ; CHECK-T2-LABEL: func4:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r0, r0, #28
-; CHECK-T2-NEXT:    sub.w r2, r0, r1, lsl #28
-; CHECK-T2-NEXT:    cmp.w r0, r1, lsl #28
-; CHECK-T2-NEXT:    it lo
-; CHECK-T2-NEXT:    movlo r2, #0
-; CHECK-T2-NEXT:    lsrs r0, r2, #28
+; CHECK-T2-NEXT:    mul r3, r1, r2
+; CHECK-T2-NEXT:    and r3, r3, #15
+; CHECK-T2-NEXT:    cmp r0, r3
+; CHECK-T2-NEXT:    it hi
+; CHECK-T2-NEXT:    movhi r3, r0
+; CHECK-T2-NEXT:    mls r0, r1, r2, r3
+; CHECK-T2-NEXT:    and r0, r0, #15
 ; CHECK-T2-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func4:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    smulbb r1, r1, r2
-; CHECK-ARM-NEXT:    lsl r0, r0, #28
-; CHECK-ARM-NEXT:    sub r2, r0, r1, lsl #28
-; CHECK-ARM-NEXT:    cmp r0, r1, lsl #28
-; CHECK-ARM-NEXT:    movlo r2, #0
-; CHECK-ARM-NEXT:    lsr r0, r2, #28
+; CHECK-ARM-NEXT:    smulbb r3, r1, r2
+; CHECK-ARM-NEXT:    and r3, r3, #15
+; CHECK-ARM-NEXT:    cmp r0, r3
+; CHECK-ARM-NEXT:    movhi r3, r0
+; CHECK-ARM-NEXT:    mls r0, r1, r2, r3
+; CHECK-ARM-NEXT:    and r0, r0, #15
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a)
diff --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll
--- a/llvm/test/CodeGen/X86/sadd_sat.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat.ll
@@ -158,36 +158,29 @@
 define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-NEXT:    shlb $4, %dl
-; X86-NEXT:    shlb $4, %cl
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    movb %cl, %ch
-; X86-NEXT:    addb %dl, %ch
-; X86-NEXT:    setns %al
-; X86-NEXT:    addl $127, %eax
-; X86-NEXT:    addb %dl, %cl
-; X86-NEXT:    movzbl %cl, %ecx
-; X86-NEXT:    cmovol %eax, %ecx
-; X86-NEXT:    sarb $4, %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    cmpb $7, %al
+; X86-NEXT:    movl $7, %eax
+; X86-NEXT:    cmovll %ecx, %eax
+; X86-NEXT:    cmpb $-8, %al
+; X86-NEXT:    movl $248, %ecx
+; X86-NEXT:    cmovgl %eax, %ecx
 ; X86-NEXT:    movsbl %cl, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func3:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlb $4, %sil
-; X64-NEXT:    shlb $4, %dil
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    addb %sil, %cl
-; X64-NEXT:    setns %al
-; X64-NEXT:    addl $127, %eax
 ; X64-NEXT:    addb %sil, %dil
-; X64-NEXT:    movzbl %dil, %ecx
-; X64-NEXT:    cmovol %eax, %ecx
-; X64-NEXT:    sarb $4, %cl
-; X64-NEXT:    movsbl %cl, %eax
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    cmpb $7, %al
+; X64-NEXT:    movl $7, %ecx
+; X64-NEXT:    cmovll %eax, %ecx
+; X64-NEXT:    cmpb $-8, %cl
+; X64-NEXT:    movl $248, %eax
+; X64-NEXT:    cmovgl %ecx, %eax
+; X64-NEXT:    movsbl %al, %eax
 ; X64-NEXT:    retq
   %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y);
   ret i4 %tmp;
diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
--- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
@@ -169,21 +169,19 @@
 define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    shlb $4, %al
-; X86-NEXT:    shlb $4, %dl
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    movb %dl, %ah
-; X86-NEXT:    addb %al, %ah
-; X86-NEXT:    setns %cl
-; X86-NEXT:    addl $127, %ecx
-; X86-NEXT:    addb %al, %dl
-; X86-NEXT:    movzbl %dl, %eax
-; X86-NEXT:    cmovol %ecx, %eax
 ; X86-NEXT:    sarb $4, %al
-; X86-NEXT:    movsbl %al, %eax
+; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    cmpb $7, %al
+; X86-NEXT:    movl $7, %eax
+; X86-NEXT:    cmovll %ecx, %eax
+; X86-NEXT:    cmpb $-8, %al
+; X86-NEXT:    movl $248, %ecx
+; X86-NEXT:    cmovgl %eax, %ecx
+; X86-NEXT:    movsbl %cl, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func4:
@@ -192,16 +190,15 @@
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    mulb %dl
 ; X64-NEXT:    shlb $4, %al
-; X64-NEXT:    shlb $4, %dil
-; X64-NEXT:    xorl %ecx, %ecx
-; X64-NEXT:    movl %edi, %edx
-; X64-NEXT:    addb %al, %dl
-; X64-NEXT:    setns %cl
-; X64-NEXT:    addl $127, %ecx
-; X64-NEXT:    addb %al, %dil
-; X64-NEXT:    movzbl %dil, %eax
-; X64-NEXT:    cmovol %ecx, %eax
 ; X64-NEXT:    sarb $4, %al
+; X64-NEXT:    addb %dil, %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    cmpb $7, %al
+; X64-NEXT:    movl $7, %ecx
+; X64-NEXT:    cmovll %eax, %ecx
+; X64-NEXT:    cmpb $-8, %cl
+; X64-NEXT:    movl $248, %eax
+; X64-NEXT:    cmovgl %ecx, %eax
 ; X64-NEXT:    movsbl %al, %eax
 ; X64-NEXT:    retq
   %a = mul i4 %y, %z
diff --git a/llvm/test/CodeGen/X86/ssub_sat.ll b/llvm/test/CodeGen/X86/ssub_sat.ll
--- a/llvm/test/CodeGen/X86/ssub_sat.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat.ll
@@ -147,34 +147,29 @@
 define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-NEXT:    shlb $4, %dl
-; X86-NEXT:    shlb $4, %cl
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpb %dl, %cl
-; X86-NEXT:    setns %al
-; X86-NEXT:    addl $127, %eax
-; X86-NEXT:    subb %dl, %cl
-; X86-NEXT:    movzbl %cl, %ecx
-; X86-NEXT:    cmovol %eax, %ecx
-; X86-NEXT:    sarb $4, %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    cmpb $7, %al
+; X86-NEXT:    movl $7, %eax
+; X86-NEXT:    cmovll %ecx, %eax
+; X86-NEXT:    cmpb $-8, %al
+; X86-NEXT:    movl $248, %ecx
+; X86-NEXT:    cmovgl %eax, %ecx
 ; X86-NEXT:    movsbl %cl, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func3:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlb $4, %sil
-; X64-NEXT:    shlb $4, %dil
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb %sil, %dil
-; X64-NEXT:    setns %al
-; X64-NEXT:    addl $127, %eax
 ; X64-NEXT:    subb %sil, %dil
-; X64-NEXT:    movzbl %dil, %ecx
-; X64-NEXT:    cmovol %eax, %ecx
-; X64-NEXT:    sarb $4, %cl
-; X64-NEXT:    movsbl %cl, %eax
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    cmpb $7, %al
+; X64-NEXT:    movl $7, %ecx
+; X64-NEXT:    cmovll %eax, %ecx
+; X64-NEXT:    cmpb $-8, %cl
+; X64-NEXT:    movl $248, %eax
+; X64-NEXT:    cmovgl %ecx, %eax
+; X64-NEXT:    movsbl %al, %eax
 ; X64-NEXT:    retq
   %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y)
   ret i4 %tmp
diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
--- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
@@ -158,19 +158,19 @@
 define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    shlb $4, %al
-; X86-NEXT:    shlb $4, %dl
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    cmpb %al, %dl
-; X86-NEXT:    setns %cl
-; X86-NEXT:    addl $127, %ecx
-; X86-NEXT:    subb %al, %dl
-; X86-NEXT:    movzbl %dl, %eax
-; X86-NEXT:    cmovol %ecx, %eax
 ; X86-NEXT:    sarb $4, %al
+; X86-NEXT:    subb %al, %cl
+; X86-NEXT:    movzbl %cl, %eax
+; X86-NEXT:    cmpb $7, %cl
+; X86-NEXT:    movl $7, %ecx
+; X86-NEXT:    cmovll %eax, %ecx
+; X86-NEXT:    cmpb $-8, %cl
+; X86-NEXT:    movl $248, %eax
+; X86-NEXT:    cmovgl %ecx, %eax
 ; X86-NEXT:    movsbl %al, %eax
 ; X86-NEXT:    retl
 ;
@@ -180,15 +180,15 @@
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    mulb %dl
 ; X64-NEXT:    shlb $4, %al
-; X64-NEXT:    shlb $4, %dil
-; X64-NEXT:    xorl %ecx, %ecx
-; X64-NEXT:    cmpb %al, %dil
-; X64-NEXT:    setns %cl
-; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    sarb $4, %al
 ; X64-NEXT:    subb %al, %dil
 ; X64-NEXT:    movzbl %dil, %eax
-; X64-NEXT:    cmovol %ecx, %eax
-; X64-NEXT:    sarb $4, %al
+; X64-NEXT:    cmpb $7, %al
+; X64-NEXT:    movl $7, %ecx
+; X64-NEXT:    cmovll %eax, %ecx
+; X64-NEXT:    cmpb $-8, %cl
+; X64-NEXT:    movl $248, %eax
+; X64-NEXT:    cmovgl %ecx, %eax
 ; X64-NEXT:    movsbl %al, %eax
 ; X64-NEXT:    retq
   %a = mul i4 %y, %z
diff --git a/llvm/test/CodeGen/X86/uadd_sat.ll b/llvm/test/CodeGen/X86/uadd_sat.ll
--- a/llvm/test/CodeGen/X86/uadd_sat.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat.ll
@@ -98,26 +98,21 @@
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    shlb $4, %cl
-; X86-NEXT:    shlb $4, %al
-; X86-NEXT:    addb %cl, %al
+; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    cmpb $15, %al
+; X86-NEXT:    movl $15, %eax
+; X86-NEXT:    cmovbl %ecx, %eax
 ; X86-NEXT:    movzbl %al, %eax
-; X86-NEXT:    movl $255, %ecx
-; X86-NEXT:    cmovael %eax, %ecx
-; X86-NEXT:    shrb $4, %cl
-; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func3:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlb $4, %sil
-; X64-NEXT:    shlb $4, %dil
 ; X64-NEXT:    addb %sil, %dil
 ; X64-NEXT:    movzbl %dil, %eax
-; X64-NEXT:    movl $255, %ecx
-; X64-NEXT:    cmovael %eax, %ecx
-; X64-NEXT:    shrb $4, %cl
+; X64-NEXT:    cmpb $15, %al
+; X64-NEXT:    movl $15, %ecx
+; X64-NEXT:    cmovbl %eax, %ecx
 ; X64-NEXT:    movzbl %cl, %eax
 ; X64-NEXT:    retq
   %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y)
diff --git a/llvm/test/CodeGen/X86/uadd_sat_plus.ll b/llvm/test/CodeGen/X86/uadd_sat_plus.ll
--- a/llvm/test/CodeGen/X86/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_plus.ll
@@ -108,17 +108,15 @@
 define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
-; X86-NEXT:    shlb $4, %al
-; X86-NEXT:    shlb $4, %cl
-; X86-NEXT:    addb %al, %cl
-; X86-NEXT:    movzbl %cl, %eax
-; X86-NEXT:    movl $255, %ecx
-; X86-NEXT:    cmovael %eax, %ecx
-; X86-NEXT:    shrb $4, %cl
-; X86-NEXT:    movzbl %cl, %eax
+; X86-NEXT:    andb $15, %al
+; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    cmpb $15, %al
+; X86-NEXT:    movl $15, %eax
+; X86-NEXT:    cmovbl %ecx, %eax
+; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func4:
@@ -126,13 +124,12 @@
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    mulb %dl
-; X64-NEXT:    shlb $4, %al
-; X64-NEXT:    shlb $4, %dil
-; X64-NEXT:    addb %al, %dil
-; X64-NEXT:    movzbl %dil, %eax
-; X64-NEXT:    movl $255, %ecx
-; X64-NEXT:    cmovael %eax, %ecx
-; X64-NEXT:    shrb $4, %cl
+; X64-NEXT:    andb $15, %al
+; X64-NEXT:    addb %dil, %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    cmpb $15, %al
+; X64-NEXT:    movl $15, %ecx
+; X64-NEXT:    cmovbl %eax, %ecx
 ; X64-NEXT:    movzbl %cl, %eax
 ; X64-NEXT:    retq
   %a = mul i4 %y, %z
diff --git a/llvm/test/CodeGen/X86/usub_sat.ll b/llvm/test/CodeGen/X86/usub_sat.ll
--- a/llvm/test/CodeGen/X86/usub_sat.ll
+++ b/llvm/test/CodeGen/X86/usub_sat.ll
@@ -97,28 +97,27 @@
 define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    shlb $4, %cl
-; X86-NEXT:    shlb $4, %al
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    subb %cl, %al
-; X86-NEXT:    movzbl %al, %eax
-; X86-NEXT:    cmovbl %edx, %eax
-; X86-NEXT:    shrb $4, %al
-; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    movzbl %cl, %edx
+; X86-NEXT:    movzbl %al, %ebx
+; X86-NEXT:    cmpb %al, %cl
+; X86-NEXT:    cmoval %edx, %ebx
+; X86-NEXT:    subb %al, %bl
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    andl $15, %eax
+; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func3:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlb $4, %sil
-; X64-NEXT:    shlb $4, %dil
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    subb %sil, %dil
-; X64-NEXT:    movzbl %dil, %ecx
-; X64-NEXT:    cmovbl %eax, %ecx
-; X64-NEXT:    shrb $4, %cl
-; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    cmpb %sil, %dil
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    cmoval %edi, %eax
+; X64-NEXT:    subb %sil, %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    andl $15, %eax
 ; X64-NEXT:    retq
   %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y)
   ret i4 %tmp
diff --git a/llvm/test/CodeGen/X86/usub_sat_plus.ll b/llvm/test/CodeGen/X86/usub_sat_plus.ll
--- a/llvm/test/CodeGen/X86/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_plus.ll
@@ -111,17 +111,22 @@
 define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
-; X86-NEXT:    shlb $4, %al
-; X86-NEXT:    shlb $4, %cl
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    subb %al, %cl
-; X86-NEXT:    movzbl %cl, %eax
-; X86-NEXT:    cmovbl %edx, %eax
-; X86-NEXT:    shrb $4, %al
-; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    andb $15, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    movzbl %cl, %ebx
+; X86-NEXT:    cmpb %dl, %cl
+; X86-NEXT:    cmovbel %esi, %ebx
+; X86-NEXT:    subb %al, %bl
+; X86-NEXT:    movzbl %bl, %eax
+; X86-NEXT:    andl $15, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func4:
@@ -129,14 +134,14 @@
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    mulb %dl
-; X64-NEXT:    shlb $4, %al
-; X64-NEXT:    shlb $4, %dil
-; X64-NEXT:    xorl %ecx, %ecx
-; X64-NEXT:    subb %al, %dil
-; X64-NEXT:    movzbl %dil, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
-; X64-NEXT:    shrb $4, %al
-; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movzbl %cl, %ecx
+; X64-NEXT:    cmpb %cl, %dil
+; X64-NEXT:    cmoval %edi, %ecx
+; X64-NEXT:    subb %al, %cl
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    andl $15, %eax
 ; X64-NEXT:    retq
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a)