diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -824,6 +824,11 @@ llvm_i32_ty], [IntrNoMem]>; + class AdvSIMD_SVE_EXPA_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + class AdvSIMD_SVE_PUNPKHI_Intrinsic : Intrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], @@ -966,12 +971,14 @@ // def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fabs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic; def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic; def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic; def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fexpa_x : AdvSIMD_SVE_EXPA_Intrinsic; def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic; @@ -984,12 +991,26 @@ def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fneg : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_frecpe_x : AdvSIMD_1VectorArg_Intrinsic; +def int_aarch64_sve_frecps_x : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_frecpx : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_frinta : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_frinti : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_frintm : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_frintn : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_frintp : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_frintx : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_frintz : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_frsqrte_x : AdvSIMD_1VectorArg_Intrinsic; +def int_aarch64_sve_frsqrts_x : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic; +def int_aarch64_sve_fsqrt : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic; @@ -1003,12 +1024,6 @@ def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; // -// Floating-point arithmetic -// -def int_aarch64_sve_frecps_x: AdvSIMD_2VectorArg_Intrinsic; -def int_aarch64_sve_frsqrts_x: AdvSIMD_2VectorArg_Intrinsic; - -// // Predicate operations // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -102,8 +102,8 @@ defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>; defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>; defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>; - defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; - defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; + defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>; + defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>; defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", int_aarch64_sve_smax>; defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", int_aarch64_sve_umax>; @@ -112,8 +112,8 @@ defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>; defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>; - defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">; - defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">; + defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>; + defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>; defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>; defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>; @@ -222,9 +222,7 @@ defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; - def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>; - def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>; - def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>; + defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>; def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">; def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">; @@ -926,15 +924,15 @@ def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>; def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>; - defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">; - defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">; - defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm">; - defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz">; - defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta">; - defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx">; - defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti">; - defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx">; - defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt">; + defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>; + defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>; + defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>; + defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>; + defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>; + defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>; + defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>; + defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>; + defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>; // InstAliases def : InstAlias<"mov $Zd, $Zn", diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1830,10 +1830,14 @@ let ElementSize = size; } -multiclass sve_fp_2op_p_zd_HSD opc, string asm> { +multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve2_fp_flogb { @@ -1863,10 +1867,14 @@ let Inst{4-0} = Zd; } -multiclass sve_fp_2op_u_zd opc, string asm> { +multiclass sve_fp_2op_u_zd opc, string asm, SDPatternOperator op> { def _H : sve_fp_2op_u_zd<0b01, opc, asm, ZPR16>; def _S : sve_fp_2op_u_zd<0b10, opc, asm, ZPR32>; def _D : sve_fp_2op_u_zd<0b11, opc, asm, ZPR64>; + + def : SVE_1_Op_Pat(NAME # _H)>; + def : SVE_1_Op_Pat(NAME # _S)>; + def : SVE_1_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -3048,10 +3056,15 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_un_pred_arit_1_fp opc, string asm> { +multiclass sve_int_un_pred_arit_1_fp opc, string asm, + SDPatternOperator op> { def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -5723,6 +5736,16 @@ let Inst{4-0} = Zd; } +multiclass sve_int_bin_cons_misc_0_c_fexpa { + def _H : sve_int_bin_cons_misc_0_c<0b01000000, asm, ZPR16>; + def _S : sve_int_bin_cons_misc_0_c<0b10000000, asm, ZPR32>; + def _D : sve_int_bin_cons_misc_0_c<0b11000000, asm, ZPR64>; + + def : SVE_1_Op_Pat(NAME # _H)>; + def : SVE_1_Op_Pat(NAME # _S)>; + def : SVE_1_Op_Pat(NAME # _D)>; +} + //===----------------------------------------------------------------------===// // SVE Integer Reduction Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll @@ -35,6 +35,40 @@ } ; +; FABS +; + +define @fabs_h( %a, %pg, %b) { +; CHECK-LABEL: fabs_h: +; CHECK: fabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabs.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @fabs_s( %a, %pg, %b) { +; CHECK-LABEL: fabs_s: +; CHECK: fabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabs.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @fabs_d( %a, %pg, %b) { +; CHECK-LABEL: fabs_d: +; CHECK: fabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabs.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; ; FADD ; @@ -242,6 +276,34 @@ } ; +; FEXPA +; + +define @fexpa_h( %a) { +; CHECK-LABEL: fexpa_h: +; CHECK: fexpa z0.h, z0.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fexpa.x.nxv8f16( %a) + ret %out +} + +define @fexpa_s( %a) { +; CHECK-LABEL: fexpa_s: +; CHECK: fexpa z0.s, z0.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fexpa.x.nxv4f32( %a) + ret %out +} + +define @fexpa_d( %pg, %a) { +; CHECK-LABEL: fexpa_d: +; CHECK: fexpa z0.d, z0.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fexpa.x.nxv2f64( %a) + ret %out +} + +; ; FMAD ; @@ -702,36 +764,36 @@ } ; -; FSCALE +; FNEG ; -define @fscale_h( %pg, %a, %b) { -; CHECK-LABEL: fscale_h: -; CHECK: fscale z0.h, p0/m, z0.h, z1.h +define @fneg_h( %a, %pg, %b) { +; CHECK-LABEL: fneg_h: +; CHECK: fneg z0.h, p0/m, z1.h ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.fscale.nxv8f16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fneg.nxv8f16( %a, + %pg, + %b) ret %out } -define @fscale_s( %pg, %a, %b) { -; CHECK-LABEL: fscale_s: -; CHECK: fscale z0.s, p0/m, z0.s, z1.s +define @fneg_s( %a, %pg, %b) { +; CHECK-LABEL: fneg_s: +; CHECK: fneg z0.s, p0/m, z1.s ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.fscale.nxv4f32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fneg.nxv4f32( %a, + %pg, + %b) ret %out } -define @fscale_d( %pg, %a, %b) { -; CHECK-LABEL: fscale_d: -; CHECK: fscale z0.d, p0/m, z0.d, z1.d +define @fneg_d( %a, %pg, %b) { +; CHECK-LABEL: fneg_d: +; CHECK: fneg z0.d, p0/m, z1.d ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.fscale.nxv2f64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fneg.nxv2f64( %a, + %pg, + %b) ret %out } @@ -884,6 +946,402 @@ } ; +; FRECPE +; + +define @frecpe_h( %a) { +; CHECK-LABEL: frecpe_h: +; CHECK: frecpe z0.h, z0.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frecpe.x.nxv8f16( %a) + ret %out +} + +define @frecpe_s( %a) { +; CHECK-LABEL: frecpe_s: +; CHECK: frecpe z0.s, z0.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frecpe.x.nxv4f32( %a) + ret %out +} + +define @frecpe_d( %pg, %a) { +; CHECK-LABEL: frecpe_d: +; CHECK: frecpe z0.d, z0.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frecpe.x.nxv2f64( %a) + ret %out +} + +; +; FRECPX +; + +define @frecpx_h( %a, %pg, %b) { +; CHECK-LABEL: frecpx_h: +; CHECK: frecpx z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frecpx.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @frecpx_s( %a, %pg, %b) { +; CHECK-LABEL: frecpx_s: +; CHECK: frecpx z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frecpx.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @frecpx_d( %a, %pg, %b) { +; CHECK-LABEL: frecpx_d: +; CHECK: frecpx z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frecpx.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; +; FRINTA +; + +define @frinta_h( %a, %pg, %b) { +; CHECK-LABEL: frinta_h: +; CHECK: frinta z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frinta.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @frinta_s( %a, %pg, %b) { +; CHECK-LABEL: frinta_s: +; CHECK: frinta z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frinta.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @frinta_d( %a, %pg, %b) { +; CHECK-LABEL: frinta_d: +; CHECK: frinta z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frinta.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; +; FRINTI +; + +define @frinti_h( %a, %pg, %b) { +; CHECK-LABEL: frinti_h: +; CHECK: frinti z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frinti.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @frinti_s( %a, %pg, %b) { +; CHECK-LABEL: frinti_s: +; CHECK: frinti z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frinti.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @frinti_d( %a, %pg, %b) { +; CHECK-LABEL: frinti_d: +; CHECK: frinti z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frinti.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; +; FRINTM +; + +define @frintm_h( %a, %pg, %b) { +; CHECK-LABEL: frintm_h: +; CHECK: frintm z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintm.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @frintm_s( %a, %pg, %b) { +; CHECK-LABEL: frintm_s: +; CHECK: frintm z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintm.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @frintm_d( %a, %pg, %b) { +; CHECK-LABEL: frintm_d: +; CHECK: frintm z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintm.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; +; FRINTN +; + +define @frintn_h( %a, %pg, %b) { +; CHECK-LABEL: frintn_h: +; CHECK: frintn z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintn.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @frintn_s( %a, %pg, %b) { +; CHECK-LABEL: frintn_s: +; CHECK: frintn z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintn.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @frintn_d( %a, %pg, %b) { +; CHECK-LABEL: frintn_d: +; CHECK: frintn z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintn.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; +; FRINTP +; + +define @frintp_h( %a, %pg, %b) { +; CHECK-LABEL: frintp_h: +; CHECK: frintp z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintp.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @frintp_s( %a, %pg, %b) { +; CHECK-LABEL: frintp_s: +; CHECK: frintp z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintp.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @frintp_d( %a, %pg, %b) { +; CHECK-LABEL: frintp_d: +; CHECK: frintp z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintp.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; +; FRINTX +; + +define @frintx_h( %a, %pg, %b) { +; CHECK-LABEL: frintx_h: +; CHECK: frintx z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintx.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @frintx_s( %a, %pg, %b) { +; CHECK-LABEL: frintx_s: +; CHECK: frintx z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintx.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @frintx_d( %a, %pg, %b) { +; CHECK-LABEL: frintx_d: +; CHECK: frintx z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintx.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; +; FRINTZ +; + +define @frintz_h( %a, %pg, %b) { +; CHECK-LABEL: frintz_h: +; CHECK: frintz z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintz.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @frintz_s( %a, %pg, %b) { +; CHECK-LABEL: frintz_s: +; CHECK: frintz z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintz.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @frintz_d( %a, %pg, %b) { +; CHECK-LABEL: frintz_d: +; CHECK: frintz z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frintz.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; +; FRSQRTE +; + +define @frsqrte_h( %a) { +; CHECK-LABEL: frsqrte_h: +; CHECK: frsqrte z0.h, z0.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frsqrte.x.nxv8f16( %a) + ret %out +} + +define @frsqrte_s( %a) { +; CHECK-LABEL: frsqrte_s: +; CHECK: frsqrte z0.s, z0.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frsqrte.x.nxv4f32( %a) + ret %out +} + +define @frsqrte_d( %pg, %a) { +; CHECK-LABEL: frsqrte_d: +; CHECK: frsqrte z0.d, z0.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.frsqrte.x.nxv2f64( %a) + ret %out +} + +; +; FSCALE +; + +define @fscale_h( %pg, %a, %b) { +; CHECK-LABEL: fscale_h: +; CHECK: fscale z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fscale_s( %pg, %a, %b) { +; CHECK-LABEL: fscale_s: +; CHECK: fscale z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fscale_d( %pg, %a, %b) { +; CHECK-LABEL: fscale_d: +; CHECK: fscale z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FSQRT +; + +define @fsqrt_h( %a, %pg, %b) { +; CHECK-LABEL: fsqrt_h: +; CHECK: fsqrt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsqrt.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @fsqrt_s( %a, %pg, %b) { +; CHECK-LABEL: fsqrt_s: +; CHECK: fsqrt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsqrt.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @fsqrt_d( %a, %pg, %b) { +; CHECK-LABEL: fsqrt_d: +; CHECK: fsqrt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsqrt.nxv2f64( %a, + %pg, + %b) + ret %out +} + +; ; FSUB ; @@ -1051,6 +1509,10 @@ declare @llvm.aarch64.sve.fabd.nxv4f32(, , ) declare @llvm.aarch64.sve.fabd.nxv2f64(, , ) +declare @llvm.aarch64.sve.fabs.nxv8f16(, , ) +declare @llvm.aarch64.sve.fabs.nxv4f32(, , ) +declare @llvm.aarch64.sve.fabs.nxv2f64(, , ) + declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) @@ -1074,6 +1536,10 @@ declare @llvm.aarch64.sve.fdivr.nxv4f32(, , ) declare @llvm.aarch64.sve.fdivr.nxv2f64(, , ) +declare @llvm.aarch64.sve.fexpa.x.nxv8f16() +declare @llvm.aarch64.sve.fexpa.x.nxv4f32() +declare @llvm.aarch64.sve.fexpa.x.nxv2f64() + declare @llvm.aarch64.sve.fmad.nxv8f16(, , , ) declare @llvm.aarch64.sve.fmad.nxv4f32(, , , ) declare @llvm.aarch64.sve.fmad.nxv2f64(, , , ) @@ -1126,6 +1592,10 @@ declare @llvm.aarch64.sve.fmulx.nxv4f32(, , ) declare @llvm.aarch64.sve.fmulx.nxv2f64(, , ) +declare @llvm.aarch64.sve.fneg.nxv8f16(, , ) +declare @llvm.aarch64.sve.fneg.nxv4f32(, , ) +declare @llvm.aarch64.sve.fneg.nxv2f64(, , ) + declare @llvm.aarch64.sve.fnmad.nxv8f16(, , , ) declare @llvm.aarch64.sve.fnmad.nxv4f32(, , , ) declare @llvm.aarch64.sve.fnmad.nxv2f64(, , , ) @@ -1142,10 +1612,54 @@ declare @llvm.aarch64.sve.fnmsb.nxv4f32(, , , ) declare @llvm.aarch64.sve.fnmsb.nxv2f64(, , , ) +declare @llvm.aarch64.sve.frecpe.x.nxv8f16() +declare @llvm.aarch64.sve.frecpe.x.nxv4f32() +declare @llvm.aarch64.sve.frecpe.x.nxv2f64() + +declare @llvm.aarch64.sve.frecpx.nxv8f16(, , ) +declare @llvm.aarch64.sve.frecpx.nxv4f32(, , ) +declare @llvm.aarch64.sve.frecpx.nxv2f64(, , ) + +declare @llvm.aarch64.sve.frinta.nxv8f16(, , ) +declare @llvm.aarch64.sve.frinta.nxv4f32(, , ) +declare @llvm.aarch64.sve.frinta.nxv2f64(, , ) + +declare @llvm.aarch64.sve.frinti.nxv8f16(, , ) +declare @llvm.aarch64.sve.frinti.nxv4f32(, , ) +declare @llvm.aarch64.sve.frinti.nxv2f64(, , ) + +declare @llvm.aarch64.sve.frintm.nxv8f16(, , ) +declare @llvm.aarch64.sve.frintm.nxv4f32(, , ) +declare @llvm.aarch64.sve.frintm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.frintn.nxv8f16(, , ) +declare @llvm.aarch64.sve.frintn.nxv4f32(, , ) +declare @llvm.aarch64.sve.frintn.nxv2f64(, , ) + +declare @llvm.aarch64.sve.frintp.nxv8f16(, , ) +declare @llvm.aarch64.sve.frintp.nxv4f32(, , ) +declare @llvm.aarch64.sve.frintp.nxv2f64(, , ) + +declare @llvm.aarch64.sve.frintx.nxv8f16(, , ) +declare @llvm.aarch64.sve.frintx.nxv4f32(, , ) +declare @llvm.aarch64.sve.frintx.nxv2f64(, , ) + +declare @llvm.aarch64.sve.frintz.nxv8f16(, , ) +declare @llvm.aarch64.sve.frintz.nxv4f32(, , ) +declare @llvm.aarch64.sve.frintz.nxv2f64(, , ) + +declare @llvm.aarch64.sve.frsqrte.x.nxv8f16() +declare @llvm.aarch64.sve.frsqrte.x.nxv4f32() +declare @llvm.aarch64.sve.frsqrte.x.nxv2f64() + declare @llvm.aarch64.sve.fscale.nxv8f16(, , ) declare @llvm.aarch64.sve.fscale.nxv4f32(, , ) declare @llvm.aarch64.sve.fscale.nxv2f64(, , ) +declare @llvm.aarch64.sve.fsqrt.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsqrt.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsqrt.nxv2f64(, , ) + declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) declare @llvm.aarch64.sve.fsub.nxv2f64(, , )