Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -702,9 +702,14 @@ defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>; } // End HasSVE +class TriSVEOpFrag : PatFrag<(ops node:$pg, node:$zn, node:$zm, node:$za), res>; + let Predicates = [HasSVEorSME] in { defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b00, "fmla", int_aarch64_sve_fmla_lane>; defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b01, "fmls", int_aarch64_sve_fmls_lane>; + defm : SVEFPIndexedTiedPatterns<"FMLA_ZZZI", TriSVEOpFrag<(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za)>>; + defm : SVEFPIndexedTiedPatterns<"FMLS_ZZZI", TriSVEOpFrag<(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za)>>; + defm : SVEFPIndexedTiedPatterns<"FMLS_ZZZI", TriSVEOpFrag<(AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za)>>; defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2393,6 +2393,18 @@ (!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx)>; } +class SVEFPIndexedTiedPattern + : Pat <(vtd (OpNode (vtp (SVEAnyPredicate)), + (vtd (splat_vector (scalartp (vector_extract (vtd zprty:$Zda), imm_ty:$idx)))), + (vtd zprty:$Zn), (vtd zprty:$Zm))), + (inst zprty:$Zda, zprty:$Zn, zprty:$Zm, (!cast("trunc_imm") $idx))>; + +multiclass SVEFPIndexedTiedPatterns { + def : SVEFPIndexedTiedPattern(INST # "_H"), ZPR16, VectorIndexH>; + def : SVEFPIndexedTiedPattern(INST # "_S"), ZPR32, VectorIndexS>; + def : SVEFPIndexedTiedPattern(INST # "_D"), ZPR64, VectorIndexD>; +} //===----------------------------------------------------------------------===// // SVE Floating Point Multiply - Indexed Group Index: llvm/test/CodeGen/AArch64/sve-fma.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-fma.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @sve_fma_nxv8f16( %a, %b, %c) { +; CHECK-LABEL: sve_fma_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z1.h, z0.h, z2.h[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %mad = call @llvm.fma.nxv8f16( %b0splat, %a, %c) + ret %mad +} + +define @sve_fma_nxv4f32( %a, %b, %c) { +; CHECK-LABEL: sve_fma_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z1.s, z0.s, z2.s[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %mad = call @llvm.fma.nxv4f32( %b0splat, %a, %c) + ret %mad +} + +define @sve_fma_nxv2f64( %a, %b, %c) { +; CHECK-LABEL: sve_fma_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z1.d, z0.d, z2.d[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %mad = call @llvm.fma.nxv2f64( %b0splat, %a, %c) + ret %mad +} + +define @sve_fmuladd_nxv8f16( %a, %b, %c) { +; CHECK-LABEL: sve_fmuladd_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z1.h, z0.h, z2.h[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %mad = call @llvm.fmuladd.nxv8f16( %b0splat, %a, %c) + ret %mad +} + +define @sve_fmuladd_nxv4f32( %a, %b, %c) { +; CHECK-LABEL: sve_fmuladd_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z1.s, z0.s, z2.s[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %mad = call @llvm.fmuladd.nxv4f32( %b0splat, %a, %c) + ret %mad +} + +define @sve_fmuladd_nxv2f64( %a, %b, %c) { +; CHECK-LABEL: sve_fmuladd_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z1.d, z0.d, z2.d[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %mad = call @llvm.fmuladd.nxv2f64( %b0splat, %a, %c) + ret %mad +} + +define @sve_fmls_nxv8f16( %a, %b, %c) { +; CHECK-LABEL: sve_fmls_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z1.h, z0.h, z2.h[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %b0splat_neg = fneg %b0splat + %mad = call @llvm.fmuladd.nxv8f16( %b0splat_neg, %a, %c) + ret %mad +} + +define @sve_fmls_nxv4f32( %a, %b, %c) { +; CHECK-LABEL: sve_fmls_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z1.s, z0.s, z2.s[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %b0splat_neg = fneg %b0splat + %mad = call @llvm.fmuladd.nxv4f32( %b0splat_neg, %a, %c) + ret %mad +} + +define @sve_fmls_nxv2f64( %a, %b, %c) { +; CHECK-LABEL: sve_fmls_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z1.d, z0.d, z2.d[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %b0splat_neg = fneg %b0splat + %mad = call @llvm.fmuladd.nxv2f64( %b0splat_neg, %a, %c) + ret %mad +} + +define @sve_fmls_nxv8f16_1( %a, %b, %c) { +; CHECK-LABEL: sve_fmls_nxv8f16_1: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z1.h, z0.h, z2.h[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %b0splat_neg = fneg %b0splat + %mad = call @llvm.fmuladd.nxv8f16( %a, %b0splat_neg, %c) + ret %mad +} + +define @sve_fmls_nxv4f32_1( %a, %b, %c) { +; CHECK-LABEL: sve_fmls_nxv4f32_1: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z1.s, z0.s, z2.s[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %b0splat_neg = fneg %b0splat + %mad = call @llvm.fmuladd.nxv4f32( %a, %b0splat_neg, %c) + ret %mad +} + +define @sve_fmls_nxv2f64_1( %a, %b, %c) { +; CHECK-LABEL: sve_fmls_nxv2f64_1: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z1.d, z0.d, z2.d[0] +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %b0splat = shufflevector %b, undef, zeroinitializer + %b0splat_neg = fneg %b0splat + %mad = call @llvm.fmuladd.nxv2f64( %a, %b0splat_neg, %c) + ret %mad +} + +declare @llvm.fma.nxv8f16(, , ) +declare @llvm.fma.nxv4f32(, , ) +declare @llvm.fma.nxv2f64(, , ) + +declare @llvm.fmuladd.nxv8f16(, , ) +declare @llvm.fmuladd.nxv4f32(, , ) +declare @llvm.fmuladd.nxv2f64(, , )