Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -832,6 +832,20 @@ llvm_i32_ty], [IntrNoMem]>; + class AdvSIMD_SVE_FCVT_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_FCVTZS_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMVectorOfBitcastsToInt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_PUNPKHI_Intrinsic : Intrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], @@ -844,6 +858,13 @@ LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class AdvSIMD_SVE_SCVTF_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_TSMUL_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -975,10 +996,54 @@ def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic; // +// Floating-point conversions +// + +def int_aarch64_sve_fcvt : AdvSIMD_SVE_FCVT_Intrinsic; +def int_aarch64_sve_fcvtzs : AdvSIMD_SVE_FCVTZS_Intrinsic; +def int_aarch64_sve_fcvtzu : AdvSIMD_SVE_FCVTZS_Intrinsic; +def int_aarch64_sve_scvtf : AdvSIMD_SVE_SCVTF_Intrinsic; +def int_aarch64_sve_ucvtf : AdvSIMD_SVE_SCVTF_Intrinsic; + +// // Floating-point comparisons // -def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>; // // Floating-point arithmetic Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -891,40 +891,40 @@ defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">; - def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>; - def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>; - def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>; - def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>; - def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>; - def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>; - def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>; - def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>; - def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>; - def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>; - def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>; - def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>; - def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>; - def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>; - def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>; - def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>; - def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>; - def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>; - def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>; - def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>; - def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>; - def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>; - def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>; - def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>; - def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>; + defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv16i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv16i1, nxv8f16, ElementSizeS>; + defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv16i1, nxv4f32, ElementSizeD>; + defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>; + defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>; + defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">; @@ -1424,10 +1424,10 @@ defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; // SVE2 floating-point convert precision - defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">; - defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">; - defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">; - def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; + defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">; + defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx">; + defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt", "int_aarch64_sve_fcvtnt">; + defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">; // SVE2 floating-point pairwise operations defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1633,18 +1633,26 @@ let Constraints = "$Zd = $_Zd"; } -multiclass sve2_fp_convert_down_narrow { +multiclass sve2_fp_convert_down_narrow { def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>; def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat(op # _f16f32), nxv8f16, nxv16i1, nxv4f32, !cast(NAME # _StoH)>; + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; } -multiclass sve2_fp_convert_up_long { +multiclass sve2_fp_convert_up_long { def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>; def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>; + + def : SVE_3_Op_Pat(op # _f32f16), nxv4f32, nxv16i1, nxv8f16, !cast(NAME # _HtoS)>; + def : SVE_3_Op_Pat(op # _f64f32), nxv2f64, nxv16i1, nxv4f32, !cast(NAME # _StoD)>; } -multiclass sve2_fp_convert_down_odd_rounding { +multiclass sve2_fp_convert_down_odd_rounding_top { def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; } //===----------------------------------------------------------------------===// @@ -1830,6 +1838,16 @@ let ElementSize = size; } +multiclass sve_fp_2op_p_zd opc, string asm, + RegisterOperand i_zprtype, + RegisterOperand o_zprtype, + SDPatternOperator op, ValueType vt1, + ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { + def NAME : sve_fp_2op_p_zd; + + def : SVE_3_Op_Pat(NAME)>; +} + multiclass sve_fp_2op_p_zd_HSD opc, string asm> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; @@ -1842,6 +1860,11 @@ def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>; } +multiclass sve2_fp_convert_down_odd_rounding { + def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>; + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; +} + //===----------------------------------------------------------------------===// // SVE Floating Point Unary Operations - Unpredicated Group //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll @@ -0,0 +1,400 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; FCVT +; + +define @fcvt_f16_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f16_f32: +; CHECK: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f16f32( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f16_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f16_f64: +; CHECK: fcvt z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f16f64( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f32_f16: +; CHECK: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f32_f64: +; CHECK: fcvt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f64_f16: +; CHECK: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f64_f32: +; CHECK: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTZS +; + +define @fcvtzs_i16_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i16_f16: +; CHECK: fcvtzs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f32: +; CHECK: fcvtzs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f64: +; CHECK: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f16: +; CHECK: fcvtzs z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f64: +; CHECK: fcvtzs z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f16: +; CHECK: fcvtzs z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f32: +; CHECK: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTZU +; + +define @fcvtzu_i16_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i16_f16: +; CHECK: fcvtzu z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f32: +; CHECK: fcvtzu z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f64: +; CHECK: fcvtzu z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f16: +; CHECK: fcvtzu z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f64: +; CHECK: fcvtzu z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f16: +; CHECK: fcvtzu z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f32: +; CHECK: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i64f32( %a, + %pg, + %b) + ret %out +} + +; +; SCVTF +; + +define @scvtf_f16_i16( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i16: +; CHECK: scvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f32_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f32_i32: +; CHECK: scvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f64_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f64_i64: +; CHECK: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f16_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i32: +; CHECK: scvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f16i32( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f16_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i64: +; CHECK: scvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f16i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f32_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f32_i64: +; CHECK: scvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f32i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f64_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f64_i32: +; CHECK: scvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f64i32( %a, + %pg, + %b) + ret %out +} + +; +; UCVTF +; + +define @ucvtf_f16_i16( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i16: +; CHECK: ucvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f32_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f32_i32: +; CHECK: ucvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f64_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f64_i64: +; CHECK: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f16_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i32: +; CHECK: ucvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f16i32( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f16_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i64: +; CHECK: ucvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f16i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f32_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f32_i64: +; CHECK: ucvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f32i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f64_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f64_i32: +; CHECK: ucvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f64i32( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.fcvt.f16f32(, , ) +declare @llvm.aarch64.sve.fcvt.f16f64(, , ) +declare @llvm.aarch64.sve.fcvt.f32f16(, , ) +declare @llvm.aarch64.sve.fcvt.f32f64(, , ) +declare @llvm.aarch64.sve.fcvt.f64f16(, , ) +declare @llvm.aarch64.sve.fcvt.f64f32(, , ) + +declare @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(, , ) +declare @llvm.aarch64.sve.fcvtzs.i32f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.i32f64(, , ) +declare @llvm.aarch64.sve.fcvtzs.i64f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.i64f32(, , ) + +declare @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(, , ) +declare @llvm.aarch64.sve.fcvtzu.i32f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.i32f64(, , ) +declare @llvm.aarch64.sve.fcvtzu.i64f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.i64f32(, , ) + +declare @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(, , ) +declare @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(, , ) +declare @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(, , ) +declare @llvm.aarch64.sve.scvtf.f16i32(, , ) +declare @llvm.aarch64.sve.scvtf.f16i64(, , ) +declare @llvm.aarch64.sve.scvtf.f32i64(, , ) +declare @llvm.aarch64.sve.scvtf.f64i32(, , ) + +declare @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f16i32(, , ) +declare @llvm.aarch64.sve.ucvtf.f16i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f32i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f64i32(, , ) Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; FCVTLT +; + +define @fcvtlt_f32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtlt_f32_f16: +; CHECK: fcvtlt z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtlt.f32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtlt_f64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtlt_f64_f32: +; CHECK: fcvtlt z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtlt.f64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTNT +; + +define @fcvtnt_f16_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtnt_f16_f32: +; CHECK: fcvtnt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.f16f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtnt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtnt_f32_f64: +; CHECK: fcvtnt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.f32f64( %a, + %pg, + %b) + ret %out +} + +; +; FCVTX +; + +define @fcvtx_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtx_f32_f64: +; CHECK: fcvtx z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtx.f32f64( %a, + %pg, + %b) + ret %out +} + +; +; FCVTXNT +; + +define @fcvtxnt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtxnt_f32_f64: +; CHECK: fcvtxnt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtxnt.f32f64( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.fcvtlt.f32f16(, , ) +declare @llvm.aarch64.sve.fcvtlt.f64f32(, , ) +declare @llvm.aarch64.sve.fcvtnt.f16f32(, , ) +declare @llvm.aarch64.sve.fcvtnt.f32f64(, , ) +declare @llvm.aarch64.sve.fcvtx.f32f64(, , ) +declare @llvm.aarch64.sve.fcvtxnt.f32f64(, , )