Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -2733,6 +2733,13 @@ BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; +def : Pat<(v2f32 (fabs (fsub V64:$Rn, V64:$Rm))), + (FABDv2f32 V64:$Rn, V64:$Rm)>; +def : Pat<(v4f32 (fabs (fsub V128:$Rn, V128:$Rm))), + (FABDv4f32 V128:$Rn, V128:$Rm)>; +def : Pat<(v2f64 (fabs (fsub V128:$Rn, V128:$Rm))), + (FABDv2f64 V128:$Rn, V128:$Rm)>; + def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), @@ -3022,6 +3029,11 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; +def : Pat<(f32 (fabs (fsub FPR32:$Rn, FPR32:$Rm))), + (FABD32 FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(f64 (fabs (fsub FPR64:$Rn, FPR64:$Rm))), + (FABD64 FPR64:$Rn, FPR64:$Rm)>; + def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), Index: test/CodeGen/AArch64/arm64-fabd.ll =================================================================== --- test/CodeGen/AArch64/arm64-fabd.ll +++ test/CodeGen/AArch64/arm64-fabd.ll @@ -0,0 +1,79 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-none-linux-gnu" + +@a = common global [4 x float] zeroinitializer +@b = common global [4 x float] zeroinitializer +@c = common global [4 x float] zeroinitializer +; CHECK: test_v4f32 +; CHECK: fabd +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) +define void @test_v4f32(){ + %1 = load <4 x float>* bitcast ([4 x float]* @b to <4 x float>*) + %2 = load <4 x float>* bitcast ([4 x float]* @c to <4 x float>*) + %3 = fsub <4 x float> %1, %2 + %4 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %3) + store <4 x float> %4, <4 x float>* bitcast ([4 x float]* @a to <4 x float>*) + ret void +} + +@d = common global [2 x float] zeroinitializer +@e = common global [2 x float] zeroinitializer +@f = common global [2 x float] zeroinitializer +; CHECK: test_v2f32 +; CHECK: fabd +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) +define void @test_v2f32(){ + %1 = load <2 x float>* bitcast ([2 x float]* @e to <2 x float>*) + %2 = load <2 x float>* bitcast ([2 x float]* @f to <2 x float>*) + %3 = fsub <2 x float> %1, %2 + %4 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %3) + store <2 x float> %4, <2 x float>* bitcast ([2 x float]* @d to <2 x float>*) + ret void +} + +@g = common global [2 x double] zeroinitializer +@h = common global [2 x double] zeroinitializer +@i = common global [2 x double] zeroinitializer +; CHECK: test_v2f64 +; CHECK: fabd +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) +define void @test_v2f64(){ + %1 = load <2 x double>* bitcast ([2 x double]* @g to <2 x double>*) + %2 = load <2 x double>* bitcast ([2 x double]* @h to <2 x double>*) + %3 = fsub <2 x double> %1, %2 + %4 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %3) + store <2 x double> %4, <2 x double>* bitcast ([2 x double]* @i to <2 x double>*) + ret void +} + +@j = common global float 0.000000e+00 +@k = common global float 0.000000e+00 +@l = common global float 0.000000e+00 +; CHECK: test_fabd32 +; CHECK: fabd +declare float @fabsf(float) +define void @test_fabd32(){ + %1 = load float* @j + %2 = load float* @k + %3 = fsub float %1, %2 + %fabsf = tail call float @fabsf(float %3) + store float %fabsf, float* @l + ret void +} + +@n = common global double 0.000000e+00 +@o = common global double 0.000000e+00 +@m = common global double 0.000000e+00 +; CHECK: test_fabd64 +; CHECK: fabd +declare double @fabs(double) +define void @test_fabd64() { + %1 = load double* @n + %2 = load double* @o + %3 = fsub double %1, %2 + %4 = tail call double @fabs(double %3) + store double %4, double* @m + ret void +} +