diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15411,6 +15411,55 @@
                      CSel.getOperand(3));
 }
 
+// Return the negative of the current node in the DAG if it exists.
+static SDValue simplifyToNegative(SDValue N, SelectionDAG &DAG) {
+  //  t2: v8i16 = AArch64ISD::CMHI t0, t1
+  //  t3: v8i16 = BUILD_VECTOR Constant:i32<1>, ...
+  //    t4: v8i16 = and t2, t3
+  if (N->getOpcode() == ISD::AND) {
+    SDValue LHS = N->getOperand(0);
+    SDValue RHS = N->getOperand(1);
+    if (LHS.getOpcode() == AArch64ISD::CMHI) {
+      KnownBits Known;
+      unsigned Depth = 0;
+      Known = DAG.computeKnownBits(RHS, Depth + 1);
+      if (Known.isConstant() && Known.getConstant().isOne())
+        return LHS;
+    }
+    if (RHS.getOpcode() == AArch64ISD::CMHI) {
+      KnownBits Known;
+      unsigned Depth = 0;
+      Known = DAG.computeKnownBits(LHS, Depth);
+      if (Known.isConstant() && Known.getConstant().isOne())
+        return RHS;
+    }
+  }
+  return SDValue();
+}
+
+// Try to fold add (-a b) or add (a -b) -> (sub b a) or (sub b a)
+// in case it is profitable to do so.
+static SDValue performAddNegCombine(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() != ISD::ADD)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  // Replace with subtraction if LHS has only one use and we can find a its
+  // negative in the DAG. This way, we can get rid of LHS.
+  if (LHS.hasOneUse())
+    if (SDValue NE = simplifyToNegative(LHS, DAG))
+      return DAG.getNode(ISD::SUB, SDLoc(N), VT, RHS, NE);
+
+  if (RHS.hasOneUse())
+    if (SDValue NE = simplifyToNegative(RHS, DAG))
+      return DAG.getNode(ISD::SUB, SDLoc(N), VT, LHS, NE);
+
+  return SDValue();
+}
+
 // The basic add/sub long vector instructions have variants with "2" on the end
 // which act on the high-half of their inputs. They are normally matched by
 // patterns like:
@@ -15541,6 +15590,8 @@
     return Val;
   if (SDValue Val = performNegCSelCombine(N, DAG))
     return Val;
+  if (SDValue Val = performAddNegCombine(N, DAG))
+    return Val;
 
   return performAddSubLongCombine(N, DCI, DAG);
 }
diff --git a/llvm/test/CodeGen/AArch64/add-negative.ll b/llvm/test/CodeGen/AArch64/add-negative.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/add-negative.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK
+
+define <8 x i16> @add_to_sub(<8 x i16> %0, <8 x i16> %1) {
+; CHECK-LABEL: add_to_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI0_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT:    cmhi v0.8h, v2.8h, v0.8h
+; CHECK-NEXT:    cmhi v1.8h, v2.8h, v1.8h
+; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+    %3 = icmp ult <8 x i16> %0, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+    %4 = sext <8 x i1> %3 to <8 x i16>
+    %5 = icmp ult <8 x i16> %1, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+    %6 = zext <8 x i1> %5 to <8 x i16>
+    %7 = add nsw <8 x i16> %6, %4
+    ret <8 x i16> %7
+}
+
diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll
--- a/llvm/test/CodeGen/AArch64/minmax.ll
+++ b/llvm/test/CodeGen/AArch64/minmax.ll
@@ -123,10 +123,8 @@
 ; CHECK-LABEL: t12:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmhi v2.16b, v1.16b, v0.16b
-; CHECK-NEXT:    movi v3.16b, #1
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    and v1.16b, v2.16b, v3.16b
-; CHECK-NEXT:    add v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    sub v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %t1 = icmp ugt <16 x i8> %b, %a
   %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b