Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -771,6 +771,21 @@ LLVMMatchType<0>], [IntrNoMem]>; + class AdvSIMD_2VectorArgIndexed_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_3VectorArgIndexed_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_CNT_Intrinsic : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], [LLVMVectorOfBitcastsToInt<0>, @@ -783,6 +798,32 @@ [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class AdvSIMD_SVE_CADD_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_CMLA_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_CMLA_LANE_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty, + llvm_i32_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_PUNPKHI_Intrinsic : Intrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], @@ -926,18 +967,34 @@ def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic; +def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic; +def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic; def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic; def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic; +def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic; // // Floating-point comparisons Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -758,6 +758,13 @@ let ParserMatchClass = Imm0_7Operand; } +// imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7] +def imm32_0_7 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_7Operand; +} + // imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15] def imm32_0_15 : Operand, ImmLeaf { +def complexrotateop : Operand, ImmLeaf= 0 && Imm <= 270; }], + SDNodeXFormgetTargetConstant((N->getSExtValue() / 90), SDLoc(N), MVT::i32); +}]>> { let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">; let PrintMethod = "printComplexRotationOp<90, 0>"; } -def complexrotateopodd : Operand { +def complexrotateopodd : Operand, ImmLeaf= 0 && Imm <= 270; }], + SDNodeXFormgetTargetConstant(((N->getSExtValue() - 90) / 180), SDLoc(N), MVT::i32); +}]>> { let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; let PrintMethod = "printComplexRotationOp<180, 90>"; } - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class BaseSIMDThreeSameVectorComplex size, bits<3> opcode, RegisterOperand regtype, Operand rottype, Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -145,28 +145,28 @@ defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>; defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>; - defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">; + defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>; - defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">; - defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">; + defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; + defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; - defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">; - defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">; - defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">; - defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">; + defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", int_aarch64_sve_fmla>; + defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", int_aarch64_sve_fmls>; + defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", int_aarch64_sve_fnmla>; + defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", int_aarch64_sve_fnmls>; - defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">; - defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">; - defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">; - defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">; + defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad>; + defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb>; + defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad>; + defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb>; - defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">; + defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>; - defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">; - defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">; + defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>; + defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>; - defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">; - defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">; + defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; + defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; // SVE floating point reductions. defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -299,7 +299,8 @@ (inst $Op1, $Op2, $Op3)>; class SVE_4_Op_Pat + ValueType vt2, ValueType vt3, ValueType vt4, + Instruction inst> : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), (inst $Op1, $Op2, $Op3, $Op4)>; @@ -1225,7 +1226,7 @@ } class sve_fp_ftmad sz, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm0_7:$imm3), +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3), asm, "\t$Zdn, $_Zdn, $Zm, $imm3", "", []>, Sched<[]> { @@ -1245,10 +1246,17 @@ let ElementSize = ElementSizeNone; } -multiclass sve_fp_ftmad { +multiclass sve_fp_ftmad { def _H : sve_fp_ftmad<0b01, asm, ZPR16>; def _S : sve_fp_ftmad<0b10, asm, ZPR32>; def _D : sve_fp_ftmad<0b11, asm, ZPR64>; + + def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 imm32_0_7:$imm))), + (!cast(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, imm32_0_7:$imm)>; + def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 imm32_0_7:$imm))), + (!cast(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, imm32_0_7:$imm)>; + def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 imm32_0_7:$imm))), + (!cast(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>; } @@ -1323,10 +1331,14 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_3op_p_zds_a opc, string asm> { +multiclass sve_fp_3op_p_zds_a opc, string asm, SDPatternOperator op> { def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>; def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>; def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>; + + def : SVE_4_Op_Pat(NAME # _H)>; + def : SVE_4_Op_Pat(NAME # _S)>; + def : SVE_4_Op_Pat(NAME # _D)>; } class sve_fp_3op_p_zds_b sz, bits<2> opc, string asm, @@ -1354,10 +1366,14 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_3op_p_zds_b opc, string asm> { +multiclass sve_fp_3op_p_zds_b opc, string asm, SDPatternOperator op> { def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>; def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>; def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>; + + def : SVE_4_Op_Pat(NAME # _H)>; + def : SVE_4_Op_Pat(NAME # _S)>; + def : SVE_4_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -1384,26 +1400,34 @@ let ElementSize = ElementSizeNone; } -multiclass sve_fp_fma_by_indexed_elem { - def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH> { +multiclass sve_fp_fma_by_indexed_elem { + def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{22} = iop{2}; let Inst{20-19} = iop{1-0}; let Inst{18-16} = Zm; } - def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS> { + def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD> { + def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD32b> { bits<4> Zm; bit iop; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))), + (!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>; + def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))), + (!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>; + def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))), + (!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>; } @@ -1425,26 +1449,33 @@ let Inst{4-0} = Zd; } -multiclass sve_fp_fmul_by_indexed_elem { - def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH> { +multiclass sve_fp_fmul_by_indexed_elem { + def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{22} = iop{2}; let Inst{20-19} = iop{1-0}; let Inst{18-16} = Zm; } - def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS> { + def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD> { + def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD32b> { bits<4> Zm; bit iop; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))), + (!cast(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>; + def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))), + (!cast(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>; + def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))), + (!cast(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>; } //===----------------------------------------------------------------------===// @@ -1476,10 +1507,17 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_fcmla { +multiclass sve_fp_fcmla { def _H : sve_fp_fcmla<0b01, asm, ZPR16>; def _S : sve_fp_fcmla<0b10, asm, ZPR32>; def _D : sve_fp_fcmla<0b11, asm, ZPR64>; + + def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, nxv8f16:$Op4, (i32 complexrotateop:$imm))), + (!cast(NAME # _H) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; + def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, nxv4f32:$Op4, (i32 complexrotateop:$imm))), + (!cast(NAME # _S) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; + def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, nxv2f64:$Op4, (i32 complexrotateop:$imm))), + (!cast(NAME # _D) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>; } //===----------------------------------------------------------------------===// @@ -1509,19 +1547,24 @@ let ElementSize = ElementSizeNone; } -multiclass sve_fp_fcmla_by_indexed_elem { - def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS> { +multiclass sve_fp_fcmla_by_indexed_elem { + def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD> { + def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD32b> { bits<4> Zm; bits<1> iop; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))), + (!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>; + def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))), + (!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>; } //===----------------------------------------------------------------------===// @@ -1552,10 +1595,17 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_fcadd { +multiclass sve_fp_fcadd { def _H : sve_fp_fcadd<0b01, asm, ZPR16>; def _S : sve_fp_fcadd<0b10, asm, ZPR32>; def _D : sve_fp_fcadd<0b11, asm, ZPR64>; + + def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 complexrotateopodd:$imm))), + (!cast(NAME # _H) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; + def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 complexrotateopodd:$imm))), + (!cast(NAME # _S) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; + def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 complexrotateopodd:$imm))), + (!cast(NAME # _D) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>; } //===----------------------------------------------------------------------===// @@ -5646,10 +5696,14 @@ let Inst{4-0} = Zd; } -multiclass sve_int_bin_cons_misc_0_b { +multiclass sve_int_bin_cons_misc_0_b { def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>; def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>; def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>; + + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve_int_bin_cons_misc_0_c opc, string asm, ZPRRegOp zprty> Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll @@ -69,6 +69,111 @@ } ; +; FCADD +; + +define @fcadd_h( %pg, %a, %b) { +; CHECK-LABEL: fcadd_h: +; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcadd.nxv8f16( %pg, + %a, + %b, + i32 90) + ret %out +} + +define @fcadd_s( %pg, %a, %b) { +; CHECK-LABEL: fcadd_s: +; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcadd.nxv4f32( %pg, + %a, + %b, + i32 270) + ret %out +} + +define @fcadd_d( %pg, %a, %b) { +; CHECK-LABEL: fcadd_d: +; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcadd.nxv2f64( %pg, + %a, + %b, + i32 90) + ret %out +} + +; +; FCMLA +; + +define @fcmla_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fcmla_h: +; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmla.nxv8f16( %pg, + %a, + %b, + %c, + i32 90) + ret %out +} + +define @fcmla_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fcmla_s: +; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmla.nxv4f32( %pg, + %a, + %b, + %c, + i32 180) + ret %out +} + +define @fcmla_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fcmla_d: +; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmla.nxv2f64( %pg, + %a, + %b, + %c, + i32 270) + ret %out +} + +; +; FCMLA (Indexed) +; + +define @fcmla_lane_h( %a, %b, %c) { +; CHECK-LABEL: fcmla_lane_h: +; CHECK: fcmla z0.h, z1.h, z2.h[3], #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmla.lane.nxv8f16( %a, + %b, + %c, + i32 3, + i32 0) + ret %out +} + +define @fcmla_lane_s( %a, %b, %c) { +; CHECK-LABEL: fcmla_lane_s: +; CHECK: fcmla z0.s, z1.s, z2.s[1], #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmla.lane.nxv4f32( %a, + %b, + %c, + i32 1, + i32 90) + ret %out +} + +; ; FDIV ; @@ -137,6 +242,43 @@ } ; +; FMAD +; + +define @fmad_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_h: +; CHECK: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmad_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_s: +; CHECK: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmad_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_d: +; CHECK: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; ; FMAX ; @@ -273,6 +415,191 @@ } ; +; FMLA +; + +define @fmla_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_h: +; CHECK: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmla_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_s: +; CHECK: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmla_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_d: +; CHECK: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FMLA (Indexed) +; + +define @fmla_lane_h( %a, %b, %c) { +; CHECK-LABEL: fmla_lane_h: +; CHECK: fmla z0.h, z1.h, z2.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.lane.nxv8f16( %a, + %b, + %c, + i32 3) + ret %out +} + +define @fmla_lane_s( %a, %b, %c) { +; CHECK-LABEL: fmla_lane_s: +; CHECK: fmla z0.s, z1.s, z2.s[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.lane.nxv4f32( %a, + %b, + %c, + i32 2) + ret %out +} + +define @fmla_lane_d( %a, %b, %c) { +; CHECK-LABEL: fmla_lane_d: +; CHECK: fmla z0.d, z1.d, z2.d[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.lane.nxv2f64( %a, + %b, + %c, + i32 1) + ret %out +} + +; +; FMLS +; + +define @fmls_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_h: +; CHECK: fmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmls_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_s: +; CHECK: fmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmls_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_d: +; CHECK: fmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FMLS (Indexed) +; + +define @fmls_lane_h( %a, %b, %c) { +; CHECK-LABEL: fmls_lane_h: +; CHECK: fmls z0.h, z1.h, z2.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.lane.nxv8f16( %a, + %b, + %c, + i32 3) + ret %out +} + +define @fmls_lane_s( %a, %b, %c) { +; CHECK-LABEL: fmls_lane_s: +; CHECK: fmls z0.s, z1.s, z2.s[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.lane.nxv4f32( %a, + %b, + %c, + i32 2) + ret %out +} + +define @fmls_lane_d( %a, %b, %c) { +; CHECK-LABEL: fmls_lane_d: +; CHECK: fmls z0.d, z1.d, z2.d[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.lane.nxv2f64( %a, + %b, + %c, + i32 1) + ret %out +} + +; +; FMSB +; + +define @fmsb_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_h: +; CHECK: fmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmsb_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_s: +; CHECK: fmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmsb_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_d: +; CHECK: fmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; ; FMUL ; @@ -307,6 +634,40 @@ } ; +; FMUL (Indexed) +; + +define @fmul_lane_h( %a, %b) { +; CHECK-LABEL: fmul_lane_h: +; CHECK: fmul z0.h, z0.h, z1.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.lane.nxv8f16( %a, + %b, + i32 3) + ret %out +} + +define @fmul_lane_s( %a, %b) { +; CHECK-LABEL: fmul_lane_s: +; CHECK: fmul z0.s, z0.s, z1.s[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.lane.nxv4f32( %a, + %b, + i32 2) + ret %out +} + +define @fmul_lane_d( %a, %b) { +; CHECK-LABEL: fmul_lane_d: +; CHECK: fmul z0.d, z0.d, z1.d[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.lane.nxv2f64( %a, + %b, + i32 1) + ret %out +} + +; ; FMULX ; @@ -375,6 +736,154 @@ } ; +; FNMAD +; + +define @fnmad_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_h: +; CHECK: fnmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmad_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_s: +; CHECK: fnmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmad_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_d: +; CHECK: fnmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FNMLA +; + +define @fnmla_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_h: +; CHECK: fnmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmla_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_s: +; CHECK: fnmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmla_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_d: +; CHECK: fnmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FNMLS +; + +define @fnmls_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_h: +; CHECK: fnmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmls_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_s: +; CHECK: fnmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmls_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_d: +; CHECK: fnmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FNMSB +; + +define @fnmsb_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_h: +; CHECK: fnmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmsb_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_s: +; CHECK: fnmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmsb_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_d: +; CHECK: fnmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; ; FSUB ; @@ -443,6 +952,40 @@ } ; +; FTMAD +; + +define @ftmad_h( %a, %b) { +; CHECK-LABEL: ftmad_h: +; CHECK: ftmad z0.h, z0.h, z1.h, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ftmad.x.nxv8f16( %a, + %b, + i32 0) + ret %out +} + +define @ftmad_s( %a, %b) { +; CHECK-LABEL: ftmad_s: +; CHECK: ftmad z0.s, z0.s, z1.s, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ftmad.x.nxv4f32( %a, + %b, + i32 0) + ret %out +} + +define @ftmad_d( %a, %b) { +; CHECK-LABEL: ftmad_d: +; CHECK: ftmad z0.d, z0.d, z1.d, #7 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ftmad.x.nxv2f64( %a, + %b, + i32 7) + ret %out +} + +; ; FTSMUL ; @@ -473,6 +1016,37 @@ ret %out } +; +; FTSSEL +; + +define @ftssel_h( %a, %b) { +; CHECK-LABEL: ftssel_h: +; CHECK: ftssel z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ftssel.x.nxv8f16( %a, + %b) + ret %out +} + +define @ftssel_s( %a, %b) { +; CHECK-LABEL: ftssel_s: +; CHECK: ftssel z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ftssel.x.nxv4f32( %a, + %b) + ret %out +} + +define @ftssel_d( %a, %b) { +; CHECK-LABEL: ftssel_d: +; CHECK: ftssel z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ftssel.x.nxv2f64( %a, + %b) + ret %out +} + declare @llvm.aarch64.sve.fabd.nxv8f16(, , ) declare @llvm.aarch64.sve.fabd.nxv4f32(, , ) declare @llvm.aarch64.sve.fabd.nxv2f64(, , ) @@ -481,6 +1055,17 @@ declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) +declare @llvm.aarch64.sve.fcadd.nxv8f16(, , , i32) +declare @llvm.aarch64.sve.fcadd.nxv4f32(, , , i32) +declare @llvm.aarch64.sve.fcadd.nxv2f64(, , , i32) + +declare @llvm.aarch64.sve.fcmla.nxv8f16(, , , , i32) +declare @llvm.aarch64.sve.fcmla.nxv4f32(, , , , i32) +declare @llvm.aarch64.sve.fcmla.nxv2f64(, , , , i32) + +declare @llvm.aarch64.sve.fcmla.lane.nxv8f16(, , , i32, i32) +declare @llvm.aarch64.sve.fcmla.lane.nxv4f32(, , , i32, i32) + declare @llvm.aarch64.sve.fdiv.nxv8f16(, , ) declare @llvm.aarch64.sve.fdiv.nxv4f32(, , ) declare @llvm.aarch64.sve.fdiv.nxv2f64(, , ) @@ -489,6 +1074,10 @@ declare @llvm.aarch64.sve.fdivr.nxv4f32(, , ) declare @llvm.aarch64.sve.fdivr.nxv2f64(, , ) +declare @llvm.aarch64.sve.fmad.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmad.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmad.nxv2f64(, , , ) + declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) @@ -505,14 +1094,54 @@ declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) +declare @llvm.aarch64.sve.fmla.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmla.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmla.lane.nxv8f16(, , , i32) +declare @llvm.aarch64.sve.fmla.lane.nxv4f32(, , , i32) +declare @llvm.aarch64.sve.fmla.lane.nxv2f64(, , , i32) + +declare @llvm.aarch64.sve.fmls.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmls.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmls.lane.nxv8f16(, , , i32) +declare @llvm.aarch64.sve.fmls.lane.nxv4f32(, , , i32) +declare @llvm.aarch64.sve.fmls.lane.nxv2f64(, , , i32) + +declare @llvm.aarch64.sve.fmsb.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmsb.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmsb.nxv2f64(, , , ) + declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) +declare @llvm.aarch64.sve.fmul.lane.nxv8f16(, , i32) +declare @llvm.aarch64.sve.fmul.lane.nxv4f32(, , i32) +declare @llvm.aarch64.sve.fmul.lane.nxv2f64(, , i32) + declare @llvm.aarch64.sve.fmulx.nxv8f16(, , ) declare @llvm.aarch64.sve.fmulx.nxv4f32(, , ) declare @llvm.aarch64.sve.fmulx.nxv2f64(, , ) +declare @llvm.aarch64.sve.fnmad.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmad.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmad.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmla.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmla.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmls.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmls.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmsb.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmsb.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmsb.nxv2f64(, , , ) + declare @llvm.aarch64.sve.fscale.nxv8f16(, , ) declare @llvm.aarch64.sve.fscale.nxv4f32(, , ) declare @llvm.aarch64.sve.fscale.nxv2f64(, , ) @@ -525,6 +1154,14 @@ declare @llvm.aarch64.sve.fsubr.nxv4f32(, , ) declare @llvm.aarch64.sve.fsubr.nxv2f64(, , ) +declare @llvm.aarch64.sve.ftmad.x.nxv8f16(, , i32) +declare @llvm.aarch64.sve.ftmad.x.nxv4f32(, , i32) +declare @llvm.aarch64.sve.ftmad.x.nxv2f64(, , i32) + declare @llvm.aarch64.sve.ftsmul.x.nxv8f16(, ) declare @llvm.aarch64.sve.ftsmul.x.nxv4f32(, ) declare @llvm.aarch64.sve.ftsmul.x.nxv2f64(, ) + +declare @llvm.aarch64.sve.ftssel.x.nxv8f16(, ) +declare @llvm.aarch64.sve.ftssel.x.nxv4f32(, ) +declare @llvm.aarch64.sve.ftssel.x.nxv2f64(, )