diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1683,60 +1683,61 @@ defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), - (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + //These patterns exist to improve the code quality of conversions on unpacked types. + def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), + (FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. // This is ignored by the pattern below where it is matched by (i64 timm0_1) - def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), - (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), + (FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - // Floating-point -> signed integer - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + // Signed integer -> Floating-point + def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg), + def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg), (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))), - (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))), - (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - // Floating-point -> unsigned integer - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + // Unsigned integer -> Floating-point + def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg), + def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg), (and (nxv4i32 ZPR:$Zs), (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), - (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), - (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -370,6 +370,14 @@ : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), (inst $Op3, $Op1, $Op2)>; +multiclass SVE_1_Op_PassthruUndef_Round_Pat{ + def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))), + (inst (IMPLICIT_DEF), $Op1, $Op2)>; + def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; +} + class SVE_1_Op_Imm_OptLsl_Reverse_Pat : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -2600,8 +2608,8 @@ SDPatternOperator int_op, SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd; - + def NAME : sve_fp_2op_p_zd, + SVEPseudo2Instr; // convert vt1 to a packed type for the intrinsic patterns defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, !eq(!cast(vt1), "nxv4f16"): nxv8f16, @@ -2615,8 +2623,11 @@ 1 : vt3); def : SVE_3_Op_Pat(NAME)>; - def : SVE_1_Op_Passthru_Pat(NAME)>; + + def _UNDEF : PredOneOpPassthruPseudo(i_zprtype)>; + + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF)>; } multiclass sve_fp_2op_p_zdr opc, string asm, @@ -2625,7 +2636,8 @@ SDPatternOperator int_op, SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd; + def NAME : sve_fp_2op_p_zd, + SVEPseudo2Instr; // convert vt1 to a packed type for the intrinsic patterns defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, @@ -2634,8 +2646,11 @@ 1 : vt1); def : SVE_3_Op_Pat(NAME)>; - def : SVE_1_Op_Passthru_Round_Pat(NAME)>; + + def _UNDEF : PredOneOpPassthruPseudo(i_zprtype)>; + + defm : SVE_1_Op_PassthruUndef_Round_Pat(NAME # _UNDEF)>; } multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -898,3 +898,377 @@ %res = uitofp %a to ret %res } + +define @fcvt_htos_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_htos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %res = fpext %b to + ret %res +} + +define @fcvt_htod_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_htod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %res = fpext %b to + ret %res +} + +define @fcvt_stod_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = fpext %b to + ret %res +} + +define @fcvt_stoh_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_stoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptrunc %b to + ret %res +} + +define @fcvt_dtoh_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_dtoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptrunc %b to + ret %res +} + +define @fcvt_dtos_movprfx( %a, %b) { +; CHECK-LABEL: fcvt_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptrunc %b to + ret %res +} + +define @scvtf_htoh_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_htoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_stos_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_stos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_stod_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_dtos_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_stoh_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_stoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_dtoh_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_dtoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @scvtf_dtod_movprfx( %a, %b) { +; CHECK-LABEL: scvtf_dtod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %b to + ret %res +} + +define @ucvtf_stos_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_stos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_htoh_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_htoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_stod_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_stoh_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_stoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_dtos_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_dtoh_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_dtoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @ucvtf_dtod_movprfx( %a, %b) { +; CHECK-LABEL: ucvtf_dtod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %b to + ret %res +} + +define @fcvtzs_htoh_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_htoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_stos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_stos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_dtos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_stod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_htos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_htos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_htod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_htod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzs_dtod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzs_dtod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptosi %b to + ret %res +} + +define @fcvtzu_htoh_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_htoh_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_stos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_stos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_dtos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_dtos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_stod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_stod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_htos_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_htos_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_htod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_htod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} + +define @fcvtzu_dtod_movprfx( %a, %b) { +; CHECK-LABEL: fcvtzu_dtod_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %res = fptoui %b to + ret %res +} diff --git a/llvm/test/CodeGen/AArch64/sve-fpext-load.ll b/llvm/test/CodeGen/AArch64/sve-fpext-load.ll --- a/llvm/test/CodeGen/AArch64/sve-fpext-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-fpext-load.ll @@ -23,7 +23,9 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.h ; CHECK-NEXT: ret %load = load , * %ptr, align 4 @@ -43,10 +45,13 @@ ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.h -; CHECK-NEXT: fcvt z1.d, p0/m, z1.h +; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: fcvt z2.d, p0/m, z3.h +; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: fcvt z3.d, p0/m, z4.h ; CHECK-NEXT: ret %load = load , * %ptr, align 4 @@ -76,7 +81,9 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.s ; CHECK-NEXT: ret %load = load , * %ptr, align 4 diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -6,10 +6,12 @@ define @fcvts_nxv8f16( %a) { ; CHECK-LABEL: fcvts_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.s, p0/m, z2.h ; CHECK-NEXT: ret %res = fpext %a to @@ -19,10 +21,12 @@ define @fcvtd_nxv4f16( %a) { ; CHECK-LABEL: fcvtd_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.h ; CHECK-NEXT: ret %res = fpext %a to @@ -33,15 +37,18 @@ ; CHECK-LABEL: fcvtd_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.s, z0.h -; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.h -; CHECK-NEXT: fcvt z1.d, p0/m, z1.h +; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: fcvt z2.d, p0/m, z3.h +; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: fcvt z3.d, p0/m, z4.h ; CHECK-NEXT: ret %res = fpext %a to @@ -51,10 +58,12 @@ define @fcvtd_nxv4f32( %a) { ; CHECK-LABEL: fcvtd_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.s ; CHECK-NEXT: ret %res = fpext %a to @@ -64,14 +73,18 @@ define @fcvtd_nxv8f32( %a) { ; CHECK-LABEL: fcvtd_nxv8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z4.d, z1.s ; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.s +; CHECK-NEXT: movprfx z1, z3 ; CHECK-NEXT: fcvt z1.d, p0/m, z3.s +; CHECK-NEXT: movprfx z2, z4 ; CHECK-NEXT: fcvt z2.d, p0/m, z4.s +; CHECK-NEXT: movprfx z3, z5 ; CHECK-NEXT: fcvt z3.d, p0/m, z5.s ; CHECK-NEXT: ret %res = fpext %a to @@ -182,10 +195,12 @@ define @fcvtzs_d_nxv4f32( %a) { ; CHECK-LABEL: fcvtzs_d_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s ; CHECK-NEXT: ret %res = fptosi %a to @@ -195,14 +210,18 @@ define @fcvtzs_s_nxv16f16( %a) { ; CHECK-LABEL: fcvtzs_s_nxv16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z4.s, z1.h ; CHECK-NEXT: uunpkhi z5.s, z1.h +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h +; CHECK-NEXT: movprfx z1, z3 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h +; CHECK-NEXT: movprfx z2, z4 ; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h +; CHECK-NEXT: movprfx z3, z5 ; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h ; CHECK-NEXT: ret %res = fptosi %a to @@ -228,10 +247,12 @@ define @fcvtzu_d_nxv4f32( %a) { ; CHECK-LABEL: fcvtzu_d_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s ; CHECK-NEXT: ret %res = fptoui %a to @@ -274,15 +295,18 @@ ; CHECK-LABEL: scvtf_s_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sunpklo z1.h, z0.b -; CHECK-NEXT: sunpkhi z0.h, z0.b ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpkhi z0.h, z0.b ; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpkhi z1.s, z1.h ; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s ; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: scvtf z0.s, p0/m, z2.s -; CHECK-NEXT: scvtf z1.s, p0/m, z1.s +; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: scvtf z2.s, p0/m, z3.s +; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: scvtf z3.s, p0/m, z4.s ; CHECK-NEXT: ret %res = sitofp %a to @@ -292,10 +316,12 @@ define @scvtf_d_nxv4i32( %a) { ; CHECK-LABEL: scvtf_d_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: scvtf z1.d, p0/m, z2.d ; CHECK-NEXT: ret %res = sitofp %a to @@ -352,10 +378,12 @@ define @ucvtf_d_nxv4i32( %a) { ; CHECK-LABEL: ucvtf_d_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d ; CHECK-NEXT: ret %res = uitofp %a to