Index: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -400,7 +400,7 @@ assert(DstReg != MI.getOperand(3).getReg()); bool UseRev = false; - unsigned PredIdx, DOPIdx, SrcIdx; + unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; switch (DType) { case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: @@ -414,6 +414,18 @@ case AArch64::DestructiveBinary: std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); break; + case AArch64::DestructiveTernaryCommWithRev: + std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); + if (DstReg == MI.getOperand(3).getReg()) { + // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za + std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); + UseRev = true; + } else if (DstReg == MI.getOperand(4).getReg()) { + // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za + std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); + UseRev = true; + } + break; default: llvm_unreachable("Unsupported Destructive Operand type"); } @@ -430,6 +442,12 @@ DstReg != MI.getOperand(DOPIdx).getReg() || MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); break; + case AArch64::DestructiveTernaryCommWithRev: + DOPRegIsUnique = + DstReg != MI.getOperand(DOPIdx).getReg() || + (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && + MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); + break; } assert (DOPRegIsUnique && "The destructive operand should be unique"); @@ -504,6 +522,12 @@ .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) .add(MI.getOperand(SrcIdx)); break; + case AArch64::DestructiveTernaryCommWithRev: + DOP.add(MI.getOperand(PredIdx)) + .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) + .add(MI.getOperand(SrcIdx)) + .add(MI.getOperand(Src2Idx)); + break; } if (PRFX) { Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -301,15 +301,20 @@ defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; - defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", int_aarch64_sve_fmla>; - defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", int_aarch64_sve_fmls>; - defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", int_aarch64_sve_fnmla>; - defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", int_aarch64_sve_fnmls>; - - defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad>; - defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb>; - defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad>; - defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb>; + defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", int_aarch64_sve_fmla, "FMAD_ZPmZZ", 1>; + defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", "FMLS_ZPZZZ", int_aarch64_sve_fmls, "FMSB_ZPmZZ", 1>; + defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", "FNMLA_ZPZZZ", int_aarch64_sve_fnmla, "FNMAD_ZPmZZ", 1>; + defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", "FNMLS_ZPZZZ", int_aarch64_sve_fnmls, "FNMSB_ZPmZZ", 1>; + + defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad, "FMLA_ZPmZZ", 0>; + defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb, "FMLS_ZPmZZ", 0>; + defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", 0>; + defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", 0>; + + defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx; defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -375,6 +375,12 @@ ValueType vt2, ValueType vt3, Instruction inst> : Pat<(vtd (vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), vt3:$Op3))), (inst $Op1, $Op2, $Op3)>; + +class SVE_4_Op_Pat_SelZero +: Pat<(vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), vt3:$Op3, vt4:$Op4)), + (inst $Op1, $Op2, $Op3, $Op4)>; } // @@ -433,6 +439,13 @@ Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2), []> { let FalseLanes = flags; } + + class PredThreeOpPseudo + : SVEPseudo2Instr, + Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2, zprty:$Zs3), []> { + let FalseLanes = flags; + } } //===----------------------------------------------------------------------===// @@ -1668,14 +1681,20 @@ let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; let ElementSize = zprty.ElementSize; } -multiclass sve_fp_3op_p_zds_a opc, string asm, SDPatternOperator op> { - def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>; - def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>; - def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>; +multiclass sve_fp_3op_p_zds_a opc, string asm, string Ps, + SDPatternOperator op, string revname="", + bit isOrig=0> { + let DestructiveInstType = DestructiveTernaryCommWithRev in { + def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>, + SVEPseudo2Instr, SVEInstr2Rev; + def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>, + SVEPseudo2Instr, SVEInstr2Rev; + def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>, + SVEPseudo2Instr, SVEInstr2Rev; + } def : SVE_4_Op_Pat(NAME # _H)>; def : SVE_4_Op_Pat(NAME # _S)>; @@ -1707,16 +1726,38 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_3op_p_zds_b opc, string asm, SDPatternOperator op> { - def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>; - def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>; - def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>; +multiclass sve_fp_3op_p_zds_b opc, string asm, SDPatternOperator op, + string revname, bit isOrig> { + def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>, + SVEInstr2Rev; + def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>, + SVEInstr2Rev; + def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>, + SVEInstr2Rev; def : SVE_4_Op_Pat(NAME # _H)>; def : SVE_4_Op_Pat(NAME # _S)>; def : SVE_4_Op_Pat(NAME # _D)>; } +multiclass sve_fp_3op_p_zds_zx { + def _ZERO_H : PredThreeOpPseudo; + def _ZERO_S : PredThreeOpPseudo; + def _ZERO_D : PredThreeOpPseudo; + + def : SVE_4_Op_Pat_SelZero(NAME # _ZERO_H)>; + def : SVE_4_Op_Pat_SelZero(NAME # _ZERO_S)>; + def : SVE_4_Op_Pat_SelZero(NAME # _ZERO_D)>; + + // As above but with the accumulator in it's alternative position. + def : Pat<(nxv8f16 (rev_op nxv8i1:$Op1, (vselect nxv8i1:$Op1, nxv8f16:$Op2, (SVEDup0)), nxv8f16:$Op3, nxv8f16:$Op4)), + (!cast(NAME # _ZERO_H) $Op1, $Op4, $Op2, $Op3)>; + def : Pat<(nxv4f32 (rev_op nxv4i1:$Op1, (vselect nxv4i1:$Op1, nxv4f32:$Op2, (SVEDup0)), nxv4f32:$Op3, nxv4f32:$Op4)), + (!cast(NAME # _ZERO_S) $Op1, $Op4, $Op2, $Op3)>; + def : Pat<(nxv2f64 (rev_op nxv2i1:$Op1, (vselect nxv2i1:$Op1, nxv2f64:$Op2, (SVEDup0)), nxv2f64:$Op3, nxv2f64:$Op4)), + (!cast(NAME # _ZERO_D) $Op1, $Op4, $Op2, $Op3)>; +} + //===----------------------------------------------------------------------===// // SVE Floating Point Multiply-Add - Indexed Group //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll @@ -28,6 +28,49 @@ ret %out } +; +; FMAD +; + +define @fmad_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_h: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmad.nxv8f16( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fmad_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_s: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmad.nxv4f32( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fmad_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_d: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmad.nxv2f64( %pg, + %a_z, + %b, + %c) + ret %out +} + ; ; FMAX ; @@ -140,6 +183,135 @@ ret %out } +; +; FMLA +; + +define @fmla_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_h: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmla.nxv8f16( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fmla_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_s: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmla.nxv4f32( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fmla_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_d: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmla.nxv2f64( %pg, + %a_z, + %b, + %c) + ret %out +} + +; +; FMLS +; + +define @fmls_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_h: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmls.nxv8f16( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fmls_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_s: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmls.nxv4f32( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fmls_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_d: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmls.nxv2f64( %pg, + %a_z, + %b, + %c) + ret %out +} + +; +; FMSB +; + +define @fmsb_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_h: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmsb.nxv8f16( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fmsb_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_s: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmsb.nxv4f32( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fmsb_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_d: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmsb.nxv2f64( %pg, + %a_z, + %b, + %c) + ret %out +} + ; ; FMUL ; @@ -168,6 +340,178 @@ ret %out } +; +; FNMAD +; + +define @fnmad_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_h: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fnmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmad.nxv8f16( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fnmad_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_s: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fnmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmad.nxv4f32( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fnmad_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_d: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fnmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmad.nxv2f64( %pg, + %a_z, + %b, + %c) + ret %out +} + +; +; FNMLA +; + +define @fnmla_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_h: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmla.nxv8f16( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fnmla_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_s: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fnmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmla.nxv4f32( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fnmla_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_d: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fnmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmla.nxv2f64( %pg, + %a_z, + %b, + %c) + ret %out +} + +; +; FNMLS +; + +define @fnmls_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_h: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fnmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmls.nxv8f16( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fnmls_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_s: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fnmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmls.nxv4f32( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fnmls_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_d: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fnmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmls.nxv2f64( %pg, + %a_z, + %b, + %c) + ret %out +} + +; +; FNMSB +; + +define @fnmsb_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_h: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmsb.nxv8f16( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fnmsb_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_s: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fnmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmsb.nxv4f32( %pg, + %a_z, + %b, + %c) + ret %out +} + +define @fnmsb_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_d: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fnmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fnmsb.nxv2f64( %pg, + %a_z, + %b, + %c) + ret %out +} + ; ; FSUB ; @@ -236,6 +580,10 @@ declare @llvm.aarch64.sve.fdivr.nxv4f32(, , ) declare @llvm.aarch64.sve.fdivr.nxv2f64(, , ) +declare @llvm.aarch64.sve.fmad.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmad.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmad.nxv2f64(, , , ) + declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) @@ -245,6 +593,26 @@ declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) +declare @llvm.aarch64.sve.fmla.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmla.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmla.lane.nxv8f16(, , , i32) +declare @llvm.aarch64.sve.fmla.lane.nxv4f32(, , , i32) +declare @llvm.aarch64.sve.fmla.lane.nxv2f64(, , , i32) + +declare @llvm.aarch64.sve.fmls.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmls.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmls.lane.nxv8f16(, , , i32) +declare @llvm.aarch64.sve.fmls.lane.nxv4f32(, , , i32) +declare @llvm.aarch64.sve.fmls.lane.nxv2f64(, , , i32) + +declare @llvm.aarch64.sve.fmsb.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmsb.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmsb.nxv2f64(, , , ) + declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) @@ -254,6 +622,22 @@ declare @llvm.aarch64.sve.fmulx.nxv4f32(, , ) declare @llvm.aarch64.sve.fmulx.nxv2f64(, , ) +declare @llvm.aarch64.sve.fnmad.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmad.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmad.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmla.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmla.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmls.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmls.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmsb.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmsb.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmsb.nxv2f64(, , , ) + declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) declare @llvm.aarch64.sve.fsub.nxv2f64(, , )