Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -397,19 +397,14 @@ [(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3), (xor node:$op1, (xor node:$op2, node:$op3))]>; -class fma_patfrags - : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), - [(intrinsic node:$pred, node:$op1, node:$op2, node:$op3), - (sdnode (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))], - [{ - if ((N->getOpcode() != AArch64ISD::FADD_PRED) && - (N->getOpcode() != AArch64ISD::FSUB_PRED)) - return true; // it's the intrinsic +class fma_patfrag + : PatFrag<(ops node:$pred, node:$op1, node:$op2, node:$op3), + (sdnode (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0))), [{ return N->getFlags().hasAllowContract(); }]>; -def AArch64fmla_m1 : fma_patfrags; -def AArch64fmls_m1 : fma_patfrags; +def AArch64fmla_m1 : fma_patfrag; +def AArch64fmls_m1 : fma_patfrag; def AArch64smax_m1 : EitherVSelectOrPassthruPatFrags; def AArch64umax_m1 : EitherVSelectOrPassthruPatFrags; @@ -654,18 +649,18 @@ defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", AArch64fmla_m1, "FMAD_ZPmZZ">; defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", "FMLS_ZPZZZ", AArch64fmls_m1, "FMSB_ZPmZZ">; - defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", "FNMLA_ZPZZZ", int_aarch64_sve_fnmla, "FNMAD_ZPmZZ">; - defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", "FNMLS_ZPZZZ", int_aarch64_sve_fnmls, "FNMSB_ZPmZZ">; + defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", "FNMLA_ZPZZZ", null_frag, "FNMAD_ZPmZZ">; + defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", "FNMLS_ZPZZZ", null_frag, "FNMSB_ZPmZZ">; defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad, "FMLA_ZPmZZ", /*isReverseInstr*/ 1>; defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb, "FMLS_ZPmZZ", /*isReverseInstr*/ 1>; defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>; defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>; - defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx; multiclass fma { // Zd = Za + Zn * Zm Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2177,10 +2177,14 @@ def : SVE_4_Op_Pat(NAME # _D)>; } -multiclass sve_fp_3op_p_zds_zx { +multiclass sve_fp_3op_p_zds_zx { def _UNDEF_H : PredThreeOpPseudo; def _UNDEF_S : PredThreeOpPseudo; def _UNDEF_D : PredThreeOpPseudo; + + def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_D)>; } //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AArch64/sve-intrinsic-fmla-fmls-fnmla-fnmls.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsic-fmla-fmls-fnmla-fnmls.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +@g_val = external local_unnamed_addr constant [8 x double], align 8 + +define @test_fmla_fmad( %pg, %r) { +; CHECK-LABEL: test_fmla_fmad: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, :got:g_val +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: ldr x8, [x8, :got_lo12:g_val] +; CHECK-NEXT: ld1rd { z2.d }, p1/z, [x8] +; CHECK-NEXT: movprfx z3, z2 +; CHECK-NEXT: fmla z3.d, p0/m, z2.d, z1.d +; CHECK-NEXT: fmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmla z2.d, p0/m, z3.d, z1.d +; CHECK-NEXT: fmov z3.d, #-1.00000000 +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z3.d +; CHECK-NEXT: fmla z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + %1 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %r, %r) + %2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @g_val, i64 0, i64 0), align 8 + %.splatinsert = insertelement poison, double %2, i64 0 + %3 = shufflevector %.splatinsert, poison, zeroinitializer + %4 = tail call contract @llvm.aarch64.sve.fmla.nxv2f64( %0, %3, %3, %1) + %5 = tail call contract @llvm.aarch64.sve.fmla.nxv2f64( %0, %3, %4, %1) + %6 = tail call contract @llvm.aarch64.sve.fmla.nxv2f64( %0, %3, %5, %1) + %7 = tail call contract @llvm.aarch64.sve.fmla.nxv2f64( %0, %3, %6, %1) + %8 = tail call contract @llvm.aarch64.sve.fmla.nxv2f64( %0, %3, %7, %1) + %9 = tail call contract @llvm.aarch64.sve.fmla.nxv2f64( %0, %3, %8, %1) + %10 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %1, %r) + %11 = tail call contract @llvm.aarch64.sve.fmla.nxv2f64( %0, %3, %9, %1) + %12 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %r, shufflevector ( insertelement ( poison, double -1.000000e+00, i32 0), poison, zeroinitializer)) + %13 = tail call contract @llvm.aarch64.sve.fmla.nxv2f64( %0, %12, %11, %10) + ret %13 +} + +define @test_fmls_fmsb( %pg, %r) { +; CHECK-LABEL: test_fmls_fmsb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, :got:g_val +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: ldr x8, [x8, :got_lo12:g_val] +; CHECK-NEXT: ld1rd { z2.d }, p1/z, [x8] +; CHECK-NEXT: movprfx z3, z2 +; CHECK-NEXT: fmls z3.d, p0/m, z2.d, z1.d +; CHECK-NEXT: fmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fmls z2.d, p0/m, z3.d, z1.d +; CHECK-NEXT: fmov z3.d, #-1.00000000 +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z3.d +; CHECK-NEXT: fmls z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + %1 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %r, %r) + %2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @g_val, i64 0, i64 0), align 8 + %.splatinsert = insertelement poison, double %2, i64 0 + %3 = shufflevector %.splatinsert, poison, zeroinitializer + %4 = tail call contract @llvm.aarch64.sve.fmls.nxv2f64( %0, %3, %3, %1) + %5 = tail call contract @llvm.aarch64.sve.fmls.nxv2f64( %0, %3, %4, %1) + %6 = tail call contract @llvm.aarch64.sve.fmls.nxv2f64( %0, %3, %5, %1) + %7 = tail call contract @llvm.aarch64.sve.fmls.nxv2f64( %0, %3, %6, %1) + %8 = tail call contract @llvm.aarch64.sve.fmls.nxv2f64( %0, %3, %7, %1) + %9 = tail call contract @llvm.aarch64.sve.fmls.nxv2f64( %0, %3, %8, %1) + %10 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %1, %r) + %11 = tail call contract @llvm.aarch64.sve.fmls.nxv2f64( %0, %3, %9, %1) + %12 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %r, shufflevector ( insertelement ( poison, double -1.000000e+00, i32 0), poison, zeroinitializer)) + %13 = tail call contract @llvm.aarch64.sve.fmls.nxv2f64( %0, %12, %11, %10) + ret %13 +} + +define @test_fnmla_fnmad( %pg, %r) { +; CHECK-LABEL: test_fnmla_fnmad: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, :got:g_val +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: ldr x8, [x8, :got_lo12:g_val] +; CHECK-NEXT: ld1rd { z2.d }, p1/z, [x8] +; CHECK-NEXT: movprfx z3, z2 +; CHECK-NEXT: fnmla z3.d, p0/m, z2.d, z1.d +; CHECK-NEXT: fnmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmad z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmla z2.d, p0/m, z3.d, z1.d +; CHECK-NEXT: fmov z3.d, #-1.00000000 +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z3.d +; CHECK-NEXT: fnmla z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + %1 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %r, %r) + %2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @g_val, i64 0, i64 0), align 8 + %.splatinsert = insertelement poison, double %2, i64 0 + %3 = shufflevector %.splatinsert, poison, zeroinitializer + %4 = tail call contract @llvm.aarch64.sve.fnmla.nxv2f64( %0, %3, %3, %1) + %5 = tail call contract @llvm.aarch64.sve.fnmla.nxv2f64( %0, %3, %4, %1) + %6 = tail call contract @llvm.aarch64.sve.fnmla.nxv2f64( %0, %3, %5, %1) + %7 = tail call contract @llvm.aarch64.sve.fnmla.nxv2f64( %0, %3, %6, %1) + %8 = tail call contract @llvm.aarch64.sve.fnmla.nxv2f64( %0, %3, %7, %1) + %9 = tail call contract @llvm.aarch64.sve.fnmla.nxv2f64( %0, %3, %8, %1) + %10 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %1, %r) + %11 = tail call contract @llvm.aarch64.sve.fnmla.nxv2f64( %0, %3, %9, %1) + %12 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %r, shufflevector ( insertelement ( poison, double -1.000000e+00, i32 0), poison, zeroinitializer)) + %13 = tail call contract @llvm.aarch64.sve.fnmla.nxv2f64( %0, %12, %11, %10) + ret %13 +} + +define @test_fnmls_fnmsb( %pg, %r) { +; CHECK-LABEL: test_fnmls_fnmsb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, :got:g_val +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: ldr x8, [x8, :got_lo12:g_val] +; CHECK-NEXT: ld1rd { z2.d }, p1/z, [x8] +; CHECK-NEXT: movprfx z3, z2 +; CHECK-NEXT: fnmls z3.d, p0/m, z2.d, z1.d +; CHECK-NEXT: fnmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmsb z3.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmls z2.d, p0/m, z3.d, z1.d +; CHECK-NEXT: fmov z3.d, #-1.00000000 +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z3.d +; CHECK-NEXT: fnmls z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + %1 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %r, %r) + %2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @g_val, i64 0, i64 0), align 8 + %.splatinsert = insertelement poison, double %2, i64 0 + %3 = shufflevector %.splatinsert, poison, zeroinitializer + %4 = tail call contract @llvm.aarch64.sve.fnmls.nxv2f64( %0, %3, %3, %1) + %5 = tail call contract @llvm.aarch64.sve.fnmls.nxv2f64( %0, %3, %4, %1) + %6 = tail call contract @llvm.aarch64.sve.fnmls.nxv2f64( %0, %3, %5, %1) + %7 = tail call contract @llvm.aarch64.sve.fnmls.nxv2f64( %0, %3, %6, %1) + %8 = tail call contract @llvm.aarch64.sve.fnmls.nxv2f64( %0, %3, %7, %1) + %9 = tail call contract @llvm.aarch64.sve.fnmls.nxv2f64( %0, %3, %8, %1) + %10 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %1, %r) + %11 = tail call contract @llvm.aarch64.sve.fnmls.nxv2f64( %0, %3, %9, %1) + %12 = tail call contract @llvm.aarch64.sve.fmul.nxv2f64( %0, %r, shufflevector ( insertelement ( poison, double -1.000000e+00, i32 0), poison, zeroinitializer)) + %13 = tail call contract @llvm.aarch64.sve.fnmls.nxv2f64( %0, %12, %11, %10) + ret %13 +} + +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) +declare i64 @llvm.vscale.i64()