diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -220,6 +220,8 @@
   // Vector halving addition
   SHADD,
   UHADD,
+  // Absolute difference
+  UABD,
 
   // Vector rounding halving addition
   SRHADD,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -759,6 +759,7 @@
   // Vector add and sub nodes may conceal a high-half opportunity.
   // Also, try to fold ADD into CSINC/CSINV..
   setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::ABS);
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::XOR);
@@ -1821,6 +1822,7 @@
     MAKE_CASE(AArch64ISD::STNP)
     MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
     MAKE_CASE(AArch64ISD::INDEX_VECTOR)
+    MAKE_CASE(AArch64ISD::UABD)
   }
 #undef MAKE_CASE
   return nullptr;
@@ -3650,6 +3652,10 @@
     return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   }
+  case Intrinsic::aarch64_neon_uabd: {
+    return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
   }
 }
 
@@ -11033,6 +11039,47 @@
   return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
 }
 
+// Given a ABS node, detect the following pattern:
+// (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))).
+// This is useful as it is the input into a SAD pattern.
+static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) {
+  SDValue AbsOp1 = Abs->getOperand(0);
+  if (AbsOp1.getOpcode() != ISD::SUB)
+    return false;
+
+  Op0 = AbsOp1.getOperand(0);
+  Op1 = AbsOp1.getOperand(1);
+
+  // Check if the operands of the sub are zero-extended from vectors of i8.
+  if (Op0.getOpcode() != ISD::ZERO_EXTEND ||
+      Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 ||
+      Op1.getOpcode() != ISD::ZERO_EXTEND ||
+      Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8)
+    return false;
+
+  return true;
+}
+
+// Detect and combine pattern for unsigned absolute difference of i8 types
+// Generates UABD instruction.
+static SDValue performABSCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const AArch64Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v16i32 && VT != MVT::v16i16)
+    return SDValue();
+
+  SDValue Op0, Op1;
+  if (!detectZextAbsDiff(SDValue(N, 0), Op0, Op1))
+    return SDValue();
+
+  Op0 = Op0.getOperand(0);
+  Op1 = Op1.getOperand(0);
+  SDValue ABD =
+      DAG.getNode(AArch64ISD::UABD, SDLoc(N), Op0->getValueType(0), Op0, Op1);
+  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
+}
+
 static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const AArch64Subtarget *Subtarget) {
@@ -14655,6 +14702,8 @@
   default:
     LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
     break;
+  case ISD::ABS:
+    return performABSCombine(N, DAG, DCI, Subtarget);
   case ISD::ADD:
   case ISD::SUB:
     return performAddSubLongCombine(N, DCI, DAG);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -559,6 +559,8 @@
 def AArch64shadd   : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
 def AArch64uhadd   : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
 
+def AArch64uabd   : SDNode<"AArch64ISD::UABD", SDT_AArch64binvec>;
+
 def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
 def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -3812,7 +3814,7 @@
 //===----------------------------------------------------------------------===//
 
 defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
-                                          int_aarch64_neon_uabd>;
+                                          AArch64uabd>;
 // Match UABDL in log2-shuffle patterns.
 def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
                            (zext (v8i8 V64:$opB))))),
@@ -4101,8 +4103,8 @@
 defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
 defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
 defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
-      TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
+      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
+defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
 defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
 defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
 defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
@@ -4699,7 +4701,7 @@
 defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
                  BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
 defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
-                                              int_aarch64_neon_uabd>;
+                                              AArch64uabd>;
 defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
                  BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
 defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -142,11 +142,11 @@
 }
 
 declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
+declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
 
-define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) {
-; CHECK-LABEL: uabdl8h_rdx
-; CHECK: uabdl2.8h
-; CHECK: uabdl.8h
+define i16 @uabd16b_rdx(<16 x i8>* %a, <16 x i8>* %b) {
+; CHECK-LABEL: uabd16b_rdx
+; CHECK: uabd.16b
   %aload = load <16 x i8>, <16 x i8>* %a, align 1
   %bload = load <16 x i8>, <16 x i8>* %b, align 1
   %aext = zext <16 x i8> %aload to <16 x i16>
@@ -159,6 +159,21 @@
   ret i16 %reduced_v
 }
 
+define i32 @uabd16b_rdx_i32(<16 x i8>* %a, <16 x i8>* %b) {
+; CHECK-LABEL: uabd16b_rdx_i32
+; CHECK: uabd.16b
+  %aload = load <16 x i8>, <16 x i8>* %a, align 1
+  %bload = load <16 x i8>, <16 x i8>* %b, align 1
+  %aext = zext <16 x i8> %aload to <16 x i32>
+  %bext = zext <16 x i8> %bload to <16 x i32>
+  %abdiff = sub nsw <16 x i32> %aext, %bext
+  %abcmp = icmp slt <16 x i32> %abdiff, zeroinitializer
+  %ababs = sub nsw <16 x i32> zeroinitializer, %abdiff
+  %absel = select <16 x i1> %abcmp, <16 x i32> %ababs, <16 x i32> %abdiff
+  %reduced_v = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %absel)
+  ret i32 %reduced_v
+}
+
 declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
 
 define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) {