Index: lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.td
+++ lib/Target/AArch64/AArch64InstrInfo.td
@@ -2733,6 +2733,13 @@
                                   BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
 defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
 
+def : Pat<(v2f32 (fabs (fsub V64:$Rn, V64:$Rm))),
+          (FABDv2f32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4f32 (fabs (fsub V128:$Rn, V128:$Rm))),
+          (FABDv4f32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v2f64 (fabs (fsub V128:$Rn, V128:$Rm))),
+          (FABDv2f64 V128:$Rn, V128:$Rm)>;
+
 def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
 def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
@@ -3022,6 +3029,11 @@
 defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
                                     int_aarch64_neon_usqadd>;
 
+def : Pat<(f32 (fabs (fsub FPR32:$Rn, FPR32:$Rm))),
+          (FABD32 FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(f64 (fabs (fsub FPR64:$Rn, FPR64:$Rm))),
+          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
+
 def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
 
 def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
Index: test/CodeGen/AArch64/arm64-neon-simd-vabs.ll
===================================================================
--- test/CodeGen/AArch64/arm64-neon-simd-vabs.ll
+++ test/CodeGen/AArch64/arm64-neon-simd-vabs.ll
@@ -0,0 +1,81 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-linux-gnu"
+
+@a = common global [4 x float] zeroinitializer
+@b = common global [4 x float] zeroinitializer
+@c = common global [4 x float] zeroinitializer
+; CHECK: test_v4f32
+; CHECK: fabd	v0.4s, v0.4s, v1.4s
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+define void @test_v4f32(){
+  %1 = load <4 x float>* bitcast ([4 x float]* @b to <4 x float>*)
+  %2 = load <4 x float>* bitcast ([4 x float]* @c to <4 x float>*)
+  %3 = fsub <4 x float> %1, %2
+  %4 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %3)
+  store <4 x float> %4, <4 x float>* bitcast ([4 x float]* @a to <4 x float>*)
+  ret void
+}
+
+@d = common global [2 x float] zeroinitializer
+@e = common global [2 x float] zeroinitializer
+@f = common global [2 x float] zeroinitializer
+; CHECK: test_v2f32
+; CHECK: fabd	v0.2s, v0.2s, v1.2s
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
+define void @test_v2f32(){
+  %1 = load <2 x float>* bitcast ([2 x float]* @e to <2 x float>*)
+  %2 = load <2 x float>* bitcast ([2 x float]* @f to <2 x float>*)
+  %3 = fsub <2 x float> %1, %2
+  %4 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %3)
+  store <2 x float> %4, <2 x float>* bitcast ([2 x float]* @d to <2 x float>*)
+  ret void
+}
+
+@g = common global [2 x double] zeroinitializer
+@h = common global [2 x double] zeroinitializer
+@i = common global [2 x double] zeroinitializer
+; CHECK: test_v2f64
+; CHECK: fabd	v0.2d, v0.2d, v1.2d
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+define void @test_v2f64(){
+  %1 = load <2 x double>* bitcast ([2 x double]* @g to <2 x double>*)
+  %2 = load <2 x double>* bitcast ([2 x double]* @h to <2 x double>*)
+  %3 = fsub <2 x double> %1, %2
+  %4 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %3)
+  store <2 x double> %4, <2 x double>* bitcast ([2 x double]* @i to <2 x double>*)
+  ret void
+}
+
+@j = common global float 0.000000e+00
+@k = common global float 0.000000e+00
+@l = common global float 0.000000e+00
+; CHECK: test_fabd32
+; CHECK: fabd	s0, s0, s1
+declare float @fabsf(float)
+define void @test_fabd32(){
+  %1 = load float* @j
+  %2 = load float* @k
+  %3 = fsub float %1, %2
+  %fabsf = tail call float @fabsf(float %3) #0
+  store float %fabsf, float* @l
+  ret void
+}
+
+@n = common global double 0.000000e+00
+@o = common global double 0.000000e+00
+@m = common global double 0.000000e+00
+; CHECK: test_fabd64
+; CHECK: fabd	d0, d0, d1
+declare double @fabs(double)
+define void @test_fabd64() {
+  %1 = load double* @n
+  %2 = load double* @o
+  %3 = fsub double %1, %2
+  %4 = tail call double @fabs(double %3) #0
+  store double %4, double* @m
+  ret void
+}
+
+attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+