Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -559,7 +559,7 @@ // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT // and SINT conversions are Custom, there is no way to tell which is // preferable. We choose SINT because that's the right thing on PPC.) - if (N->getOpcode() == ISD::FP_TO_UINT && + if (!NVT.isScalableVector() && N->getOpcode() == ISD::FP_TO_UINT && !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -80,6 +80,8 @@ FMAXNM_PRED, FMINNM_PRED, FMUL_PRED, + FP_TO_UINT_PRED, + FP_TO_SINT_PRED, FSUB_PRED, MUL_PRED, SDIV_PRED, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -944,6 +944,8 @@ for (MVT VT : MVT::integer_scalable_vector_valuetypes()) { if (isTypeLegal(VT)) { setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -1602,6 +1604,8 @@ MAKE_CASE(AArch64ISD::FMINNM_PRED) MAKE_CASE(AArch64ISD::FMINNMV_PRED) MAKE_CASE(AArch64ISD::FMUL_PRED) + MAKE_CASE(AArch64ISD::FP_TO_UINT_PRED) + MAKE_CASE(AArch64ISD::FP_TO_SINT_PRED) MAKE_CASE(AArch64ISD::FSUB_PRED) MAKE_CASE(AArch64ISD::NOT) MAKE_CASE(AArch64ISD::BIT) @@ -2866,6 +2870,14 @@ // in the cost tables. EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); + + if (VT.isScalableVector()) { + unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT + ? AArch64ISD::FP_TO_UINT_PRED + : AArch64ISD::FP_TO_SINT_PRED; + return LowerToPredicatedOp(Op, DAG, Opcode); + } + unsigned NumElts = InVT.getVectorNumElements(); // f16 conversions are promoted to f32 when full fp16 is not supported. @@ -12309,6 +12321,19 @@ Zero); } +static SDValue combineSVEConversionFP(SDNode *N, unsigned Opc, + SelectionDAG &DAG) { + SDLoc DL(N); + + SDValue Pred = N->getOperand(2); + SDValue VecToConvert = N->getOperand(3); + EVT ConvertVT = N->getOperand(1).getValueType(); + + return DAG.getNode(Opc, DL, ConvertVT, + DAG.getUNDEF(ConvertVT), + Pred, VecToConvert); +} + static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, SelectionDAG &DAG) { SDLoc DL(N); @@ -12490,6 +12515,18 @@ return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG); case Intrinsic::aarch64_sve_fminv: return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG); + case Intrinsic::aarch64_sve_fcvtzu: + case Intrinsic::aarch64_sve_fcvtzu_i32f64: + case Intrinsic::aarch64_sve_fcvtzu_i32f16: + case Intrinsic::aarch64_sve_fcvtzu_i64f16: + case Intrinsic::aarch64_sve_fcvtzu_i64f32: + return combineSVEConversionFP(N, AArch64ISD::FP_TO_UINT_PRED, DAG); + case Intrinsic::aarch64_sve_fcvtzs: + case Intrinsic::aarch64_sve_fcvtzs_i32f64: + case Intrinsic::aarch64_sve_fcvtzs_i64f32: + case Intrinsic::aarch64_sve_fcvtzs_i32f16: + case Intrinsic::aarch64_sve_fcvtzs_i64f16: + return combineSVEConversionFP(N, AArch64ISD::FP_TO_SINT_PRED, DAG); case Intrinsic::aarch64_sve_sel: return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); @@ -15736,7 +15773,13 @@ assert(VT.isScalableVector() && "Only expect to lower scalable vector op!"); - SmallVector Operands = {Pg}; + SmallVector Operands; + + if (NewOp == AArch64ISD::FP_TO_UINT_PRED || + NewOp == AArch64ISD::FP_TO_SINT_PRED) + Operands.push_back(DAG.getUNDEF(Op.getValueType())); + + Operands.push_back(Pg); for (const SDValue &V : Op->op_values()) { assert((isa(V) || V.getValueType().isScalableVector()) && "Only scalable vectors are supported!"); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -167,6 +167,14 @@ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3> ]>; +def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, + SDTCVecEltisVT<2,i1> +]>; + +def AArch64fcvtzu_p : SDNode<"AArch64ISD::FP_TO_UINT_PRED", SDT_AArch64FCVT>; +def AArch64fcvtzs_p : SDNode<"AArch64ISD::FP_TO_SINT_PRED", SDT_AArch64FCVT>; + def SDT_AArch64FMA : SDTypeProfile<1, 4, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4> @@ -1387,16 +1395,14 @@ defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; + defm FCVTZS_ZPmZ : sve_fp_2op_p_zd_signed <"fcvtzs", AArch64fcvtzs_p>; + defm FCVTZU_ZPmZ : sve_fp_2op_p_zd_unsigned<"fcvtzu", AArch64fcvtzu_p>; defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>; defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>; defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>; defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>; defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>; @@ -1411,16 +1417,6 @@ defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>; defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>; - defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>; - defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>; - defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>; - defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", null_frag, AArch64frintn_mt>; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", null_frag, AArch64frintp_mt>; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2286,6 +2286,48 @@ def : SVE_3_Op_Pat(NAME)>; } +multiclass sve_fp_2op_p_zd_signed { + def _HtoH : sve_fp_2op_p_zd<0b0111010, asm, ZPR16, ZPR16, ElementSizeH>; + def _HtoS : sve_fp_2op_p_zd<0b0111100, asm, ZPR16, ZPR32, ElementSizeS>; + def _HtoD : sve_fp_2op_p_zd<0b0111110, asm, ZPR16, ZPR64, ElementSizeD>; + def _StoS : sve_fp_2op_p_zd<0b1011100, asm, ZPR32, ZPR32, ElementSizeS>; + def _StoD : sve_fp_2op_p_zd<0b1111100, asm, ZPR32, ZPR64, ElementSizeD>; + def _DtoS : sve_fp_2op_p_zd<0b1111000, asm, ZPR64, ZPR32, ElementSizeS>; + def _DtoD : sve_fp_2op_p_zd<0b1111110, asm, ZPR64, ZPR64, ElementSizeD>; + + def : SVE_3_Op_Pat(NAME # _HtoH)>; + def : SVE_3_Op_Pat(NAME # _HtoS)>; + def : SVE_3_Op_Pat(NAME # _HtoS)>; + def : SVE_3_Op_Pat(NAME # _HtoD)>; + def : SVE_3_Op_Pat(NAME # _HtoD)>; + def : SVE_3_Op_Pat(NAME # _StoS)>; + def : SVE_3_Op_Pat(NAME # _StoD)>; + def : SVE_3_Op_Pat(NAME # _StoD)>; + def : SVE_3_Op_Pat(NAME # _DtoS)>; + def : SVE_3_Op_Pat(NAME # _DtoD)>; +} + +multiclass sve_fp_2op_p_zd_unsigned { + def _HtoH : sve_fp_2op_p_zd<0b0111011, asm, ZPR16, ZPR16, ElementSizeH>; + def _HtoS : sve_fp_2op_p_zd<0b0111101, asm, ZPR16, ZPR32, ElementSizeS>; + def _HtoD : sve_fp_2op_p_zd<0b0111111, asm, ZPR16, ZPR64, ElementSizeD>; + def _StoS : sve_fp_2op_p_zd<0b1011101, asm, ZPR32, ZPR32, ElementSizeS>; + def _StoD : sve_fp_2op_p_zd<0b1111101, asm, ZPR32, ZPR64, ElementSizeD>; + def _DtoS : sve_fp_2op_p_zd<0b1111001, asm, ZPR64, ZPR32, ElementSizeS>; + def _DtoD : sve_fp_2op_p_zd<0b1111111, asm, ZPR64, ZPR64, ElementSizeD>; + + def : SVE_3_Op_Pat(NAME # _HtoH)>; + def : SVE_3_Op_Pat(NAME # _HtoS)>; + def : SVE_3_Op_Pat(NAME # _HtoS)>; + def : SVE_3_Op_Pat(NAME # _HtoD)>; + def : SVE_3_Op_Pat(NAME # _HtoD)>; + def : SVE_3_Op_Pat(NAME # _StoS)>; + def : SVE_3_Op_Pat(NAME # _StoD)>; + def : SVE_3_Op_Pat(NAME # _StoD)>; + def : SVE_3_Op_Pat(NAME # _DtoS)>; + def : SVE_3_Op_Pat(NAME # _DtoD)>; +} + multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op_merge, SDPatternOperator op_pt = null_frag> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; Index: llvm/test/CodeGen/AArch64/sve-fcvt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -0,0 +1,230 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; FP_TO_SINT +; + +define @fcvtzs_h_nxv2f16( %a) { +; CHECK-LABEL: fcvtzs_h_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv4f16( %a) { +; CHECK-LABEL: fcvtzs_h_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv8f16( %a) { +; CHECK-LABEL: fcvtzs_h_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv2f16( %a) { +; CHECK-LABEL: fcvtzs_s_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv2f32( %a) { +; CHECK-LABEL: fcvtzs_s_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv2f64( %a) { +; CHECK-LABEL: fcvtzs_s_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv4f16( %a) { +; CHECK-LABEL: fcvtzs_s_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv4f32( %a) { +; CHECK-LABEL: fcvtzs_s_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_d_nxv2f16( %a) { +; CHECK-LABEL: fcvtzs_d_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_d_nxv2f32( %a) { +; CHECK-LABEL: fcvtzs_d_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_d_nxv2f64( %a) { +; CHECK-LABEL: fcvtzs_d_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +; +; FP_TO_UINT +; + +define @fcvtzu_h_nxv2f16( %a) { +; CHECK-LABEL: fcvtzu_h_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_h_nxv4f16( %a) { +; CHECK-LABEL: fcvtzu_h_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_h_nxv8f16( %a) { +; CHECK-LABEL: fcvtzu_h_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv2f16( %a) { +; CHECK-LABEL: fcvtzu_s_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv2f32( %a) { +; CHECK-LABEL: fcvtzu_s_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv2f64( %a) { +; CHECK-LABEL: fcvtzu_s_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv4f16( %a) { +; CHECK-LABEL: fcvtzu_s_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv4f32( %a) { +; CHECK-LABEL: fcvtzu_s_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_d_nxv2f16( %a) { +; CHECK-LABEL: fcvtzu_d_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_d_nxv2f32( %a) { +; CHECK-LABEL: fcvtzu_d_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_d_nxv2f64( %a) { +; CHECK-LABEL: fcvtzu_d_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} Index: llvm/test/CodeGen/AArch64/sve-split-fcvt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s + +; FP_TO_SINT + +; Split operand +define @fcvtzs_s_nxv4f64( %a) { +; CHECK-LABEL: fcvtzs_s_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_h_nxv8f64( %a) { +; CHECK-LABEL: fcvtzs_h_nxv8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +; Split result +define @fcvtzs_d_nxv4f32( %a) { +; CHECK-LABEL: fcvtzs_d_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_s_nxv16f16( %a) { +; CHECK-LABEL: fcvtzs_s_nxv16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z4.s, z1.h +; CHECK-NEXT: uunpkhi z5.s, z1.h +; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h +; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h +; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h +; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +; FP_TO_UINT + +; Split operand +define @fcvtzu_s_nxv4f64( %a) { +; CHECK-LABEL: fcvtzu_s_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +; Split result +define @fcvtzu_d_nxv4f32( %a) { +; CHECK-LABEL: fcvtzu_d_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +}