diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1675,58 +1675,28 @@ def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), + (FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. // This is ignored by the pattern below where it is matched by (i64 timm0_1) def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), + (FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - // Floating-point -> signed integer - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), - (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg), - (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), - (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), - (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))), - (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), - (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))), - (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + defm : sve2_fp_convert_extend; + defm : sve2_fp_convert_extend; + defm : sve2_fp_convert_extend; + defm : sve2_fp_convert_extend; + defm : sve2_fp_convert_extend; // Floating-point -> unsigned integer - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), - (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), - (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg), - (and (nxv4i32 ZPR:$Zs), - (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), - (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), - (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - - def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), - (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), - (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + defm : sve2_fp_convert_narrow; + defm : sve2_fp_convert_narrow; + defm : sve2_fp_convert_narrow; + defm : sve2_fp_convert_narrow; + defm : sve2_fp_convert_narrow; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -372,6 +372,14 @@ : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), (inst $Op3, $Op1, $Op2)>; +multiclass SVE_1_Op_PassthruUndef_Round_Pat{ + def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))), + (inst (IMPLICIT_DEF), $Op1, $Op2)>; + def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; +} + class SVE_1_Op_Imm_OptLsl_Reverse_Pat : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -2572,14 +2580,40 @@ let ElementSize = Sz; } +multiclass sve2_fp_convert_extend { + def : Pat<(vtd (op (vtp PPR:$Pg), + (sext_inreg (vtint_extend ZPR:$Zs), vtint), (vtd ZPR:$Zd))), + (!cast(inst) ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + + def : Pat<(vtd (op (vtp (SVEAllActive):$Pg), + (sext_inreg (vtint_extend ZPR:$Zs), vtint), (vtd ZPR:$Zd))), + (!cast(inst # _UNDEF) ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; +} + +multiclass sve2_fp_convert_narrow { + def : Pat<(vtd (op (vtp PPR:$Pg), + (and (vtint_extend ZPR:$Zs), + (vtint_extend (AArch64dup imm))), (vtd ZPR:$Zd))), + (!cast(inst) ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + + def : Pat<(vtd (op (vtp (SVEAllActive):$Pg), + (and (vtint_extend ZPR:$Zs), (vtint_extend (AArch64dup imm))), + (vtd ZPR:$Zd))), + (!cast(inst # _UNDEF) ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; +} + multiclass sve_fp_2op_p_zd opc, string asm, RegisterOperand i_zprtype, RegisterOperand o_zprtype, SDPatternOperator int_op, SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd; - + def NAME : sve_fp_2op_p_zd, + SVEPseudo2Instr; // convert vt1 to a packed type for the intrinsic patterns defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, !eq(!cast(vt1), "nxv4f16"): nxv8f16, @@ -2594,7 +2628,9 @@ def : SVE_3_Op_Pat(NAME)>; + def _UNDEF : PredOneOpPassthruPseudo(i_zprtype)>; def : SVE_1_Op_Passthru_Pat(NAME)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF)>; } multiclass sve_fp_2op_p_zdr opc, string asm, @@ -2603,7 +2639,8 @@ SDPatternOperator int_op, SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd; + def NAME : sve_fp_2op_p_zd, + SVEPseudo2Instr; // convert vt1 to a packed type for the intrinsic patterns defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, @@ -2613,7 +2650,9 @@ def : SVE_3_Op_Pat(NAME)>; + def _UNDEF : PredOneOpPassthruPseudo(i_zprtype)>; def : SVE_1_Op_Passthru_Round_Pat(NAME)>; + defm : SVE_1_Op_PassthruUndef_Round_Pat(NAME # _UNDEF)>; } multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -898,3 +898,497 @@ %res = uitofp %a to ret %res } + +define @fcvt_htos_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_htos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %res = fpext %b to + ret %res +} + +define @fcvt_htod_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_htod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %res = fpext %b to + ret %res +} + +define @fcvt_stod_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = fpext %b to + ret %res +} + +define @fcvt_stoh_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_stoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptrunc %b to + ret %res +} + +define @fcvt_dtoh_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_dtoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptrunc %b to + ret %res +} + +define @fcvt_dtos_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptrunc %b to + ret %res +} + +define @scvtf_htoh_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_htoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_stos_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_stos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_stod_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_dtos_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_stoh_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_stoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_dtoh_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_dtoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_dtod_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_dtod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @ucvtf_stos_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_stos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_htoh_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_htoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_stod_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_stoh_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_stoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_dtos_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_dtoh_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_dtoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_dtod_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_dtod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @fcvtzs_htoh_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_htoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_stos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_stos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_dtos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_stod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_htos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_htos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_htod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_htod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_dtod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_dtod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzu_htoh_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_htoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_stos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_stos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_dtos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_stod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_htos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_htos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_htod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_htod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_dtod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_dtod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @scvtf_nxv2f16_htoh( %a, %pg, %b) { +; CHECK-LABEL: scvtf_nxv2f16_htoh: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ext = sext %b to + %res = call @llvm.aarch64.sve.scvtf.nxv2f16( %a, %pg, %ext) + ret %res +} + + +define @scvtf_nxv4f16_htoh( %a, %pg, %b) { +; CHECK-LABEL: scvtf_nxv4f16_htoh: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ext = sext %b to + %res = call @llvm.aarch64.sve.scvtf.nxv4f16( %a, %pg, %ext) + ret %res +} + +define @scvtf_nxv2f16_stoh( %a, %pg, %b) { +; CHECK-LABEL: scvtf_nxv2f16_stoh: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %ext = sext %b to + %res = call @llvm.aarch64.sve.scvtf.nxv2f16( %a, %pg, %ext) + ret %res +} + +define @scvtf_nxv2f32_stos( %a, %pg, %b) { +; CHECK-LABEL: scvtf_nxv2f32_stos: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ext = sext %b to + %res = call @llvm.aarch64.sve.scvtf.nxv2f32( %a, %pg, %ext) + ret %res +} + +define @scvtf_nxv2f64_stod( %a, %pg, %b) { +; CHECK-LABEL: scvtf_nxv2f64_stod: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %ext = sext %b to + %res = call @llvm.aarch64.sve.scvtf.nxv2f64( %a, %pg, %ext) + ret %res +} + +define @ucvtf_nxv2f16_htoh( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_nxv2f16_htoh: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ins = insertelement poison, i64 65535, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %and = and %b, %splat + %res = call @llvm.aarch64.sve.ucvtf.nxv2f16( %a, %pg, %and) + ret %res +} + +define @ucvtf_nxv2f16_stoh( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_nxv2f16_stoh: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %ins = insertelement poison, i64 4294967295, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %and = and %b, %splat + %res = call @llvm.aarch64.sve.ucvtf.nxv2f16( %a, %pg, %and) + ret %res +} + +define @ucvtf_nxv4f16_htoh( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_nxv4f16_htoh: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ins = insertelement poison, i32 65535, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %and = and %b, %splat + %res = call @llvm.aarch64.sve.ucvtf.nxv4f16( %a, %pg, %and) + ret %res +} + +define @ucvtf_nxv2f32_stos( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_nxv2f32_stos: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ins = insertelement poison, i64 4294967295, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %and = and %b, %splat + %res = call @llvm.aarch64.sve.ucvtf.nxv2f32( %a, %pg, %and) + ret %res +} + +define @ucvtf_nxv2f64_stod( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_nxv2f64_stod: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %ins = insertelement poison, i64 4294967295, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %and = and %b, %splat + %res = call @llvm.aarch64.sve.ucvtf.nxv2f64( %a, %pg, %and) + ret %res +} + +declare @llvm.aarch64.sve.scvtf.nxv2f16(, , ) +declare @llvm.aarch64.sve.scvtf.nxv4f16(, , ) +declare @llvm.aarch64.sve.scvtf.nxv2f32(, , ) +declare @llvm.aarch64.sve.scvtf.nxv2f64(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv2f16(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv4f16(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv2f32(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv2f64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-fpext-load.ll b/llvm/test/CodeGen/AArch64/sve-fpext-load.ll --- a/llvm/test/CodeGen/AArch64/sve-fpext-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-fpext-load.ll @@ -23,7 +23,9 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.h ; CHECK-NEXT: ret %load = load , * %ptr, align 4 @@ -43,10 +45,13 @@ ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.h -; CHECK-NEXT: fcvt z1.d, p0/m, z1.h +; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: fcvt z2.d, p0/m, z3.h +; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: fcvt z3.d, p0/m, z4.h ; CHECK-NEXT: ret %load = load , * %ptr, align 4 @@ -76,7 +81,9 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.s ; CHECK-NEXT: ret %load = load , * %ptr, align 4 diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -6,10 +6,12 @@ define @fcvts_nxv8f16( %a) { ; CHECK-LABEL: fcvts_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.s, p0/m, z2.h ; CHECK-NEXT: ret %res = fpext %a to @@ -19,10 +21,12 @@ define @fcvtd_nxv4f16( %a) { ; CHECK-LABEL: fcvtd_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.h ; CHECK-NEXT: ret %res = fpext %a to @@ -33,15 +37,18 @@ ; CHECK-LABEL: fcvtd_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.s, z0.h -; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.h -; CHECK-NEXT: fcvt z1.d, p0/m, z1.h +; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: fcvt z2.d, p0/m, z3.h +; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: fcvt z3.d, p0/m, z4.h ; CHECK-NEXT: ret %res = fpext %a to @@ -51,10 +58,12 @@ define @fcvtd_nxv4f32( %a) { ; CHECK-LABEL: fcvtd_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.s ; CHECK-NEXT: ret %res = fpext %a to @@ -64,14 +73,18 @@ define @fcvtd_nxv8f32( %a) { ; CHECK-LABEL: fcvtd_nxv8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z4.d, z1.s ; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.s +; CHECK-NEXT: movprfx z1, z3 ; CHECK-NEXT: fcvt z1.d, p0/m, z3.s +; CHECK-NEXT: movprfx z2, z4 ; CHECK-NEXT: fcvt z2.d, p0/m, z4.s +; CHECK-NEXT: movprfx z3, z5 ; CHECK-NEXT: fcvt z3.d, p0/m, z5.s ; CHECK-NEXT: ret %res = fpext %a to @@ -182,10 +195,12 @@ define @fcvtzs_d_nxv4f32( %a) { ; CHECK-LABEL: fcvtzs_d_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s ; CHECK-NEXT: ret %res = fptosi %a to @@ -195,14 +210,18 @@ define @fcvtzs_s_nxv16f16( %a) { ; CHECK-LABEL: fcvtzs_s_nxv16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z4.s, z1.h ; CHECK-NEXT: uunpkhi z5.s, z1.h +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h +; CHECK-NEXT: movprfx z1, z3 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h +; CHECK-NEXT: movprfx z2, z4 ; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h +; CHECK-NEXT: movprfx z3, z5 ; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h ; CHECK-NEXT: ret %res = fptosi %a to @@ -228,10 +247,12 @@ define @fcvtzu_d_nxv4f32( %a) { ; CHECK-LABEL: fcvtzu_d_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s ; CHECK-NEXT: ret %res = fptoui %a to @@ -274,15 +295,18 @@ ; CHECK-LABEL: scvtf_s_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sunpklo z1.h, z0.b -; CHECK-NEXT: sunpkhi z0.h, z0.b ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpkhi z0.h, z0.b ; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpkhi z1.s, z1.h ; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s ; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: scvtf z0.s, p0/m, z2.s -; CHECK-NEXT: scvtf z1.s, p0/m, z1.s +; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: scvtf z2.s, p0/m, z3.s +; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: scvtf z3.s, p0/m, z4.s ; CHECK-NEXT: ret %res = sitofp %a to @@ -292,10 +316,12 @@ define @scvtf_d_nxv4i32( %a) { ; CHECK-LABEL: scvtf_d_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: scvtf z1.d, p0/m, z2.d ; CHECK-NEXT: ret %res = sitofp %a to @@ -352,10 +378,12 @@ define @ucvtf_d_nxv4i32( %a) { ; CHECK-LABEL: ucvtf_d_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d ; CHECK-NEXT: ret %res = uitofp %a to