Index: lib/Target/AArch64/AArch64InstrNEON.td =================================================================== --- lib/Target/AArch64/AArch64InstrNEON.td +++ lib/Target/AArch64/AArch64InstrNEON.td @@ -5788,6 +5788,37 @@ defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; +// Scalar general arithmetic operation +class Neon_Scalar_GeneralMath2D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +class Neon_Scalar_GeneralMath3D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (INST FPR64:$Rn, FPR64:$Rm)>; + +class Neon_Scalar_GeneralMath4D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), + (v1f64 FPR64:$Ra))), + (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; + +def : Neon_Scalar_GeneralMath2D_pattern; +def : Neon_Scalar_GeneralMath2D_pattern; + +def : Neon_Scalar_GeneralMath4D_pattern; +def : Neon_Scalar_GeneralMath4D_pattern; //===----------------------------------------------------------------------===// // Non-Instruction Patterns Index: test/CodeGen/AArch64/neon-add-sub.ll =================================================================== --- test/CodeGen/AArch64/neon-add-sub.ll +++ test/CodeGen/AArch64/neon-add-sub.ll @@ -118,3 +118,120 @@ ret <2 x double> %tmp3 } +define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vadd_f64 +; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fadd <1 x double> %a, %b + ret <1 x double> %1 +} + +define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vmul_f64 +; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fmul <1 x double> %a, %b + ret <1 x double> %1 +} + +define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vdiv_f64 +; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fdiv <1 x double> %a, %b + ret <1 x double> %1 +} + +define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { +; CHECK-LABEL: test_vmla_f64 +; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fmul <1 x double> %b, %c + %2 = fadd <1 x double> %1, %a + ret <1 x double> %2 +} + +define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { +; CHECK-LABEL: test_vmls_f64 +; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fmul <1 x double> %b, %c + %2 = fsub <1 x double> %a, %1 + ret <1 x double> %2 +} + +define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { +; CHECK-LABEL: test_vfms_f64 +; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fsub <1 x double> , %b + %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a) + ret <1 x double> %2 +} + +define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { +; CHECK-LABEL: test_vfma_f64 +; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vsub_f64 +; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fsub <1 x double> %a, %b + ret <1 x double> %1 +} + +define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vabd_f64 +; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vmax_f64 +; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vmin_f64 +; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vmaxnm_f64 +; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vminnm_f64 +; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vabs_f64(<1 x double> %a) { +; CHECK-LABEL: test_vabs_f64 +; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vneg_f64(<1 x double> %a) { +; CHECK-LABEL: test_vneg_f64 +; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}} + %1 = fsub <1 x double> , %a + ret <1 x double> %1 +} + +declare <1 x double> @llvm.fabs.v1f64(<1 x double>) +declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) \ No newline at end of file Index: tools/clang/include/clang/Basic/arm_neon.td =================================================================== --- tools/clang/include/clang/Basic/arm_neon.td +++ tools/clang/include/clang/Basic/arm_neon.td @@ -595,20 +595,22 @@ //////////////////////////////////////////////////////////////////////////////// // Addition -// With additional Qd type. -def ADD : IOpInst<"vadd", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_ADD>; +// With additional d, Qd type. +def ADD : IOpInst<"vadd", "ddd", "csilfdUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", + OP_ADD>; //////////////////////////////////////////////////////////////////////////////// // Subtraction // With additional Qd type. -def SUB : IOpInst<"vsub", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_SUB>; +def SUB : IOpInst<"vsub", "ddd", "csildfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", + OP_SUB>; //////////////////////////////////////////////////////////////////////////////// // Multiplication // With additional Qd type. -def MUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>; -def MLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>; -def MLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>; +def MUL : IOpInst<"vmul", "ddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>; +def MLA : IOpInst<"vmla", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>; +def MLS : IOpInst<"vmls", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>; //////////////////////////////////////////////////////////////////////////////// // Multiplication Extended @@ -616,13 +618,13 @@ //////////////////////////////////////////////////////////////////////////////// // Division -def FDIV : IOpInst<"vdiv", "ddd", "fQfQd", OP_DIV>; +def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>; //////////////////////////////////////////////////////////////////////////////// // Vector fused multiply-add operations -// With additional Qd type. -def FMLA : SInst<"vfma", "dddd", "fQfQd">; -def FMLS : SInst<"vfms", "dddd", "fQfQd">; +// With additional d, Qd type. +def FMLA : SInst<"vfma", "dddd", "fdQfQd">; +def FMLS : SInst<"vfms", "dddd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Logical operations @@ -632,15 +634,15 @@ //////////////////////////////////////////////////////////////////////////////// // Absolute Difference -// With additional Qd type. -def ABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; +// With additional d, Qd type. +def ABD : SInst<"vabd", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; //////////////////////////////////////////////////////////////////////////////// // saturating absolute/negate // With additional Qd/Ql type. -def ABS : SInst<"vabs", "dd", "csifQcQsQiQfQlQd">; +def ABS : SInst<"vabs", "dd", "csifdQcQsQiQfQlQd">; def QABS : SInst<"vqabs", "dd", "csiQcQsQiQl">; -def NEG : SOpInst<"vneg", "dd", "csifQcQsQiQfQdQl", OP_NEG>; +def NEG : SOpInst<"vneg", "dd", "csifdQcQsQiQfQdQl", OP_NEG>; def QNEG : SInst<"vqneg", "dd", "csiQcQsQiQl">; //////////////////////////////////////////////////////////////////////////////// @@ -742,13 +744,13 @@ //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer // With additional Qd type. -def MAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; -def MIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; +def MAX : SInst<"vmax", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; +def MIN : SInst<"vmin", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; //////////////////////////////////////////////////////////////////////////////// // MaxNum/MinNum Floating Point -def FMAXNM : SInst<"vmaxnm", "ddd", "fQfQd">; -def FMINNM : SInst<"vminnm", "ddd", "fQfQd">; +def FMAXNM : SInst<"vmaxnm", "ddd", "fdQfQd">; +def FMINNM : SInst<"vminnm", "ddd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Max/Min Index: tools/clang/test/CodeGen/aarch64-neon-intrinsics.c =================================================================== --- tools/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ tools/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -11218,3 +11218,93 @@ // CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} return vabdd_f64(a, b); } + +float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) { + // CHECK-LABEL: test_vadd_f64 + return vadd_f64(a, b); + // CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) { + // CHECK-LABEL: test_vmul_f64 + return vmul_f64(a, b); + // CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) { + // CHECK-LABEL: test_vdiv_f64 + return vdiv_f64(a, b); + // CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) { + // CHECK-LABEL: test_vmla_f64 + return vmla_f64(a, b, c); + // CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) { + // CHECK-LABEL: test_vmls_f64 + return vmls_f64(a, b, c); + // CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) { + // CHECK-LABEL: test_vfma_f64 + return vfma_f64(a, b, c); + // CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) { + // CHECK-LABEL: test_vfms_f64 + return vfms_f64(a, b, c); + // CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) { + // CHECK-LABEL: test_vsub_f64 + return vsub_f64(a, b); + // CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) { + // CHECK-LABEL: test_vabd_f64 + return vabd_f64(a, b); + // CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) { +// CHECK-LABEL: test_vmax_f64 + return vmax_f64(a, b); +// CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) { +// CHECK-LABEL: test_vmin_f64 + return vmin_f64(a, b); +// CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) { +// CHECK-LABEL: test_vmaxnm_f64 + return vmaxnm_f64(a, b); +// CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) { +// CHECK-LABEL: test_vminnm_f64 + return vminnm_f64(a, b); +// CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vabs_f64(float64x1_t a) { + // CHECK-LABEL: test_vabs_f64 + return vabs_f64(a); + // CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}} +} + +float64x1_t test_vneg_f64(float64x1_t a) { + // CHECK-LABEL: test_vneg_f64 + return vneg_f64(a); + // CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}} +}