Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2544,6 +2544,16 @@
   if (N1.isUndef())
     return N1;
 
+  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
+  if (N0.getOpcode() == ISD::XOR && N0.getOperand(1) == N1 &&
+      N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
+      TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+    unsigned OpSizeInBits = VT.getScalarSizeInBits();
+    if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+      if (C->getAPIntValue() == (OpSizeInBits - 1))
+        return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+  }
+
   // If the relocation model supports it, consider symbol offsets.
   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
@@ -5649,13 +5659,19 @@
   }
 
   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
-  unsigned OpSizeInBits = VT.getScalarSizeInBits();
-  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
-      N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
-      TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
-    if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
-      if (C->getAPIntValue() == (OpSizeInBits - 1))
-        return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+    SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
+    SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
+    if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
+      SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
+      SDValue S0 = S.getOperand(0);
+      if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
+        unsigned OpSizeInBits = VT.getScalarSizeInBits();
+        if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
+          if (C->getAPIntValue() == (OpSizeInBits - 1))
+            return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+      }
+    }
   }
 
   // fold (xor x, x) -> 0
Index: lib/Target/ARM/ARMInstrNEON.td
===================================================================
--- lib/Target/ARM/ARMInstrNEON.td
+++ lib/Target/ARM/ARMInstrNEON.td
@@ -5391,6 +5391,13 @@
                                          (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
           (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
 
+def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
+          (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
+def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
+          (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+def : Pat<(v2i64 (abs (sub (zext (v2i32 DPR:$opA)), (zext (v2i32 DPR:$opB))))),
+          (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+
 //   VABA     : Vector Absolute Difference and Accumulate
 defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
                              "vaba", "s", int_arm_neon_vabds, add>;
Index: lib/Target/Hexagon/HexagonISelLowering.cpp
===================================================================
--- lib/Target/Hexagon/HexagonISelLowering.cpp
+++ lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1327,6 +1327,9 @@
     setMinimumJumpTableEntries(std::numeric_limits<int>::max());
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 
+  setOperationAction(ISD::ABS, MVT::i32, Legal);
+  setOperationAction(ISD::ABS, MVT::i64, Legal);
+
   // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
   // but they only operate on i64.
   for (MVT VT : MVT::integer_valuetypes()) {
Index: lib/Target/Hexagon/HexagonPatterns.td
===================================================================
--- lib/Target/Hexagon/HexagonPatterns.td
+++ lib/Target/Hexagon/HexagonPatterns.td
@@ -1175,22 +1175,6 @@
                      (i32 (LoReg $Rs)))>;
 }
 
-let AddedComplexity = 50 in
-multiclass Abs_pat<InstHexagon MI, PatFrag RsPred, int Sh> {
-  // Let y = x >> 31 (for 32-bit), i.e. the sign bit repeated.
-  // abs(x) = (x + y) ^ y
-  def: Pat<(xor (add (sra RsPred:$Rs, (i32 Sh)), RsPred:$Rs),
-                (sra RsPred:$Rs, (i32 Sh))),
-           (MI RsPred:$Rs)>;
-  // abs(x) = (x ^ y) - y
-  def: Pat<(sub (xor RsPred:$Rs, (sra RsPred:$Rs, (i32 Sh))),
-                (sra RsPred:$Rs, (i32 Sh))),
-           (MI RsPred:$Rs)>;
-}
-
-defm: Abs_pat<A2_abs,  I32, 31>;
-defm: Abs_pat<A2_absp, I64, 63>;
-
 def: Pat<(add I32:$Rs, anyimm:$s16),   (A2_addi   I32:$Rs,  imm:$s16)>;
 def: Pat<(or  I32:$Rs, anyimm:$s10),   (A2_orir   I32:$Rs,  imm:$s10)>;
 def: Pat<(and I32:$Rs, anyimm:$s10),   (A2_andir  I32:$Rs,  imm:$s10)>;
Index: test/CodeGen/Hexagon/abs.ll
===================================================================
--- test/CodeGen/Hexagon/abs.ll
+++ test/CodeGen/Hexagon/abs.ll
@@ -54,4 +54,30 @@
   ret i64 %v2
 }
 
-attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" }
+; CHECK-LABEL: f6:
+; CHECK: r[[R60:[0-9]+]] = abs(r0)
+; CHECK: r[[R61:[0-9]+]] = asr(r0,#31)
+; CHECK: r0 = addasl(r[[R61]],r[[R60]],#1)
+define i32 @f6(i32 %a0) #0 {
+  %v0 = ashr i32 %a0, 31
+  %v1 = add i32 %a0, %v0
+  %v2 = xor i32 %v0, %v1
+  %v3 = mul i32 %v2, 2
+  %v4 = add i32 %v0, %v3
+  ret i32 %v4
+}
+
+; CHECK-LABEL: f7:
+; CHECK: r[[R70:[0-9]+]] = abs(r0)
+; CHECK: r[[R71:[0-9]+]] = asr(r0,#31)
+; CHECK: r0 = addasl(r[[R71]],r[[R70]],#1)
+define i32 @f7(i32 %a0) #0 {
+  %v0 = ashr i32 %a0, 31
+  %v1 = add i32 %v0, %a0
+  %v2 = xor i32 %v0, %v1
+  %v3 = shl i32 %v2, 1
+  %v4 = add i32 %v0, %v3
+  ret i32 %v4
+}
+
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="-packets" }