Diff 405617

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 1,667 Lines • ▼ Show 20 Lines	let Predicates = [HasSVEorStreamingSVE] in {
defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;		defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;		defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;		defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;		defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;		defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;		defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;		defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;

def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),		def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
		sdesmalenUnsubmitted Not Done Reply Inline Actions Shouldn't we retain the original pattern in case $Pg is not all active? sdesmalen: Shouldn't we retain the original pattern in case $Pg is not all active?
		sdesmalenUnsubmitted Not Done Reply Inline Actions Ah I see this question was already answered by @peterwaller-arm. sdesmalen: Ah I see this question was already answered by @peterwaller-arm.
(FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
		peterwaller-armUnsubmitted Not Done Reply Inline Actions I believe this pattern should have the same tweak as discussed below -- `SVEAllActive` and `FCVT_ZPmZ_HtoS` => `FCVT_ZPmZ_HtoS_UNDEF`. peterwaller-arm: I believe this pattern should have the same tweak as discussed below -- `SVEAllActive` and…

// FP_ROUND has an additional 'precise' flag which indicates the type of rounding.		// FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
// This is ignored by the pattern below where it is matched by (i64 timm0_1)		// This is ignored by the pattern below where it is matched by (i64 timm0_1)
def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),		def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
(FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
		peterwaller-armUnsubmitted Not Done Reply Inline Actions The change you're making is adding the condition that `$Pg` must be `(SVEAllActive)`. This has the effect of: making these patterns more specific than the patterns you are introducing in `sve_fp_2op_p_zd{,r}`, so 👍, they still match when they are needed for example in `llvm/test/CodeGen/AArch64/sve-fcvt.ll`, to avoid introducing sign extension. if I'm not mistaken, `(SVEAllActive):$Pg` is the case where the `_UNDEF` pseudos are applicable. UNDEF in this case meaning that since the predicate is all active, it's fine if the inactive lanes are undef (since they're unused), and pseudo expansion can do its thing (and materialize a movprfx). Therefore, for these patterns you have modified, you want to introduce UNDEF `FCVT_ZPmZ_StoH` => `FCVT_ZPmZ_StoH_UNDEF`. This fixes the `scvtf_stod_movprfx` case. One thing to beware is that you're introducing the constraint that the `$Pg` is all true. This implies there are some cases where this may have matched before but won't anymore. I believe this to be OK since floating point conversion is necessarily unpredicated in the LangRef instructions, which will cause the predicate to be all active. But we should check if it's possible to specify a predicate through the aarch64.sve.* intrinsics (or ACLE). Please can you and other reviewers consider for a moment if this matters. My initial pass on this says 'no' but more time or knowledge may indicate 'yes'. To fix the unsigned cases below (line 1705), they also need the `SVEAllActive` constraint on `$Pg` and `_UNDEF` adding. peterwaller-arm: The change you're making is adding the condition that `$Pg` must be `(SVEAllActive)`. This has…

// Floating-point -> signed integer		// Signed integer -> Floating-point
peterwaller-armUnsubmitted Not Done Reply Inline Actions This comment appears to have been dropped. However, I note that it appears to be misleading/wrong. To me, it reads 'convert floating point to integer', but the conversion is taking a signed integer as input and producing a floating point. peterwaller-arm: This comment appears to have been dropped. However, I note that it appears to be…
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),		def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
		bsmithUnsubmitted Not Done Reply Inline Actions I don't believe converting these patterns like this is ok, some of the intrinsics (such as `@llvm.aarch64.sve.scvtf`) can get lowered to these nodes without an all active predicate. With this patch I believe a case such as the below will either regress or fail to select. define <vscale x 8 x half> @scvtf_f16_i16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) { %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a,<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) ret <vscale x 8 x half> %out } declare <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x i16>) bsmith: I don't believe converting these patterns like this is ok, some of the intrinsics (such as…
		peterwaller-armUnsubmitted Not Done Reply Inline Actions These patterns exist to improve the code quality for the unpacked types (e.g. `<vscale x 2 x i16>`), and those are not expressible by the user (via the intrinsics). The predicate is all true by construction -- via the IR instructions -- so we have realised that the `_UNDEF` forms are what is wanted here, and these patterns are only needed for the `(SVEAllActive)` case. peterwaller-arm: These patterns exist to improve the code quality for the unpacked types (e.g. `<vscale x 2 x…
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),		(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),
(SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg),		def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg),
(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),		(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),
(SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),		def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),		(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),
(SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),		def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),		(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),
(SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),		def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),		(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),
(SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

// Floating-point -> unsigned integer		// Unsigned integer -> Floating-point
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),		def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(and (nxv2i64 ZPR:$Zs),		(and (nxv2i64 ZPR:$Zs),
(nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),		(nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
(UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),		def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(and (nxv2i64 ZPR:$Zs),		(and (nxv2i64 ZPR:$Zs),
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),		(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
(UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg),		def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg),
(and (nxv4i32 ZPR:$Zs),		(and (nxv4i32 ZPR:$Zs),
(nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),		(nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
(UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),		def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(and (nxv2i64 ZPR:$Zs),		(and (nxv2i64 ZPR:$Zs),
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),		(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
(UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),		def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(and (nxv2i64 ZPR:$Zs),		(and (nxv2i64 ZPR:$Zs),
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),		(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
(UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;		(UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;

defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;		defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;		defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;
defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>;		defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>;
defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>;		defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>;
defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>;		defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>;
defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>;		defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>;
defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;		defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;
▲ Show 20 Lines • Show All 1,451 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 366 Lines • ▼ Show 20 Lines

// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the // Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the

// type of rounding. This is matched by timm0_1 in pattern below and ignored. // type of rounding. This is matched by timm0_1 in pattern below and ignored.

class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg, class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,

ValueType vts, Instruction inst> ValueType vts, Instruction inst>

: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)),

(inst $Op3, $Op1, $Op2)>; (inst $Op3, $Op1, $Op2)>;

multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,

ValueType vts, Instruction inst>{

def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))),

(inst (IMPLICIT_DEF), $Op1, $Op2)>;

def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)),

(inst $Op3, $Op1, $Op2)>;

}

class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty, class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,

ValueType it, ComplexPattern cpx, Instruction inst> ValueType it, ComplexPattern cpx, Instruction inst>

: Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))),

(inst $Op1, i32:$imm, i32:$shift)>; (inst $Op1, i32:$imm, i32:$shift)>;

class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty, class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,

ValueType it, ComplexPattern cpx, Instruction inst> ValueType it, ComplexPattern cpx, Instruction inst>

: Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))), : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))),

▲ Show 20 Lines • Show All 2,190 Lines • ▼ Show 20 Lines

} }

multiclass sve_fp_2op_p_zd<bits<7> opc, string asm, multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,

RegisterOperand i_zprtype, RegisterOperand i_zprtype,

RegisterOperand o_zprtype, RegisterOperand o_zprtype,

SDPatternOperator int_op, SDPatternOperator int_op,

SDPatternOperator ir_op, ValueType vt1, SDPatternOperator ir_op, ValueType vt1,

ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {

def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>; def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,

SVEPseudo2Instr<NAME, 1>;

// convert vt1 to a packed type for the intrinsic patterns // convert vt1 to a packed type for the intrinsic patterns

defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16, defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,

!eq(!cast<string>(vt1), "nxv4f16"): nxv8f16, !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,

!eq(!cast<string>(vt1), "nxv2f32"): nxv4f32, !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,

1 : vt1); 1 : vt1);

peterwaller-armUnsubmitted

Not Done

An ask, arising from the name vtint_extend: please rename this to vtint_wide and imm => imm_mask_wide.

This is a narrowing rather than an extension. And the purpose of this type (and the related immediate imm) is to have a mask which selects only the desired bits from the input. Effectively, (as an example case), the patterns are producing a convert of an nxv2i16 to to an nxv2f16, but since there are only two of them, it's necessary to first put them in an nxv2f64 register. The extension operation translates from nxv2i16 to nxv2i64, for sign extension it's an sext, but for unsigned widening it masks out the higher bits with the and instruction. But since we're starting out with an nxv2i16 in the first place, and we know the result will only be used as an nxv2f16, we know those bits being masked won't be used and it can be dropped.

peterwaller-arm: An ask, arising from the name `vtint_extend`: please rename this to `vtint_wide` and `imm` =>…

// convert vt3 to a packed type for the intrinsic patterns // convert vt3 to a packed type for the intrinsic patterns

defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16, defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16,

!eq(!cast<string>(vt3), "nxv4f16"): nxv8f16, !eq(!cast<string>(vt3), "nxv4f16"): nxv8f16,

!eq(!cast<string>(vt3), "nxv2f32"): nxv4f32, !eq(!cast<string>(vt3), "nxv2f32"): nxv4f32,

1 : vt3); 1 : vt3);

def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>; def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;

def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;

def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>; def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;

defm : SVE_1_Op_PassthruUndef_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;

peterwaller-armUnsubmitted

Not Done

def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;

- def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;

def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;

+ def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;

defm : SVE_1_Op_PassthruUndef_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;

}

multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,

(see later comment for rationale)

peterwaller-arm: (see later comment for rationale)

} }

multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm, multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,

RegisterOperand i_zprtype, RegisterOperand i_zprtype,

RegisterOperand o_zprtype, RegisterOperand o_zprtype,

SDPatternOperator int_op, SDPatternOperator int_op,

SDPatternOperator ir_op, ValueType vt1, SDPatternOperator ir_op, ValueType vt1,

ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {

def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>; def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,

SVEPseudo2Instr<NAME, 1>;

// convert vt1 to a packed type for the intrinsic patterns // convert vt1 to a packed type for the intrinsic patterns

defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16, defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,

!eq(!cast<string>(vt1), "nxv4f16"): nxv8f16, !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,

!eq(!cast<string>(vt1), "nxv2f32"): nxv4f32, !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,

1 : vt1); 1 : vt1);

def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>; def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;

def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;

def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>; def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;

defm : SVE_1_Op_PassthruUndef_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;

peterwaller-armUnsubmitted

Not Done

def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;

- def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;

def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;

+ def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;

defm : SVE_1_Op_PassthruUndef_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;

}

multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {

Nit. Suggestion to reorder these. Typically we see multiclass { ... instructions ... ; ... patterns ... } or multiclass { ... instruction1 ... patterns1 ...; ... instructions2 ...; ... patterns2 ...; } but here they are interleaved (ins1, pat1, ins2, pat1, pat2). This could be subtly confusing, if someone is expecting the SVE_1_Op_Passthru_Round_Pat to correspond to the _UNDEF definition immediately above. More linebreaks also help here.

peterwaller-arm: Nit. Suggestion to reorder these. Typically we see `multiclass { ... instructions ... ; ...

} }

multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> { multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {

def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>, def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>,

SVEPseudo2Instr<NAME # _H, 1>; SVEPseudo2Instr<NAME # _H, 1>;

def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>, def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>,

SVEPseudo2Instr<NAME # _S, 1>; SVEPseudo2Instr<NAME # _S, 1>;

def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>, def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>,

▲ Show 20 Lines • Show All 5,865 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fcvt.ll

	Show First 20 Lines • Show All 892 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: ucvtf_d_nxv2i64:			; CHECK-LABEL: ucvtf_d_nxv2i64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d			; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = uitofp <vscale x 2 x i64> %a to <vscale x 2 x double>			%res = uitofp <vscale x 2 x i64> %a to <vscale x 2 x double>
	ret <vscale x 2 x double> %res			ret <vscale x 2 x double> %res
	}			}

				define <vscale x 4 x float> @fcvt_htos_movprfx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
				; CHECK-LABEL: fcvt_htos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = fpext <vscale x 4 x half> %b to <vscale x 4 x float>
				ret <vscale x 4 x float> %res
				}

				define <vscale x 2 x double> @fcvt_htod_movprfx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
				; CHECK-LABEL: fcvt_htod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = fpext <vscale x 2 x half> %b to <vscale x 2 x double>
				ret <vscale x 2 x double> %res
				}

				define <vscale x 2 x double> @fcvt_stod_movprfx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
				; CHECK-LABEL: fcvt_stod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = fpext <vscale x 2 x float> %b to <vscale x 2 x double>
				ret <vscale x 2 x double> %res
				}

				define <vscale x 4 x half> @fcvt_stoh_movprfx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
				; CHECK-LABEL: fcvt_stoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvt z0.h, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = fptrunc <vscale x 4 x float> %b to <vscale x 4 x half>
				ret <vscale x 4 x half> %res
				}

				define <vscale x 2 x half> @fcvt_dtoh_movprfx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
				; CHECK-LABEL: fcvt_dtoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvt z0.h, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = fptrunc <vscale x 2 x double> %b to <vscale x 2 x half>
				ret <vscale x 2 x half> %res
				}

				define <vscale x 2 x float> @fcvt_dtos_movprfx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
				; CHECK-LABEL: fcvt_dtos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvt z0.s, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = fptrunc <vscale x 2 x double> %b to <vscale x 2 x float>
				ret <vscale x 2 x float> %res
				}

				define <vscale x 8 x half> @scvtf_htoh_movprfx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
				; CHECK-LABEL: scvtf_htoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: scvtf z0.h, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = sitofp <vscale x 8 x i16> %b to <vscale x 8 x half>
				ret <vscale x 8 x half> %res
				}

				define <vscale x 4 x float> @scvtf_stos_movprfx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: scvtf_stos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: scvtf z0.s, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = sitofp <vscale x 4 x i32> %b to <vscale x 4 x float>
				ret <vscale x 4 x float> %res
				}

				define <vscale x 2 x double> @scvtf_stod_movprfx(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
				; CHECK-LABEL: scvtf_stod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: scvtf z0.d, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = sitofp <vscale x 2 x i32> %b to <vscale x 2 x double>
				ret <vscale x 2 x double> %res
				}

				define <vscale x 2 x float> @scvtf_dtos_movprfx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: scvtf_dtos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: scvtf z0.s, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = sitofp <vscale x 2 x i64> %b to <vscale x 2 x float>
				ret <vscale x 2 x float> %res
				}

				define <vscale x 4 x half> @scvtf_stoh_movprfx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: scvtf_stoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: scvtf z0.h, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = sitofp <vscale x 4 x i32> %b to <vscale x 4 x half>
				ret <vscale x 4 x half> %res
				}

				define <vscale x 2 x half> @scvtf_dtoh_movprfx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: scvtf_dtoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: scvtf z0.h, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = sitofp <vscale x 2 x i64> %b to <vscale x 2 x half>
				ret <vscale x 2 x half> %res
				}

				define <vscale x 2 x double> @scvtf_dtod_movprfx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: scvtf_dtod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = sitofp <vscale x 2 x i64> %b to <vscale x 2 x double>
				ret <vscale x 2 x double> %res
				}

				define <vscale x 4 x float> @ucvtf_stos_movprfx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: ucvtf_stos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: ucvtf z0.s, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = uitofp <vscale x 4 x i32> %b to <vscale x 4 x float>
				ret <vscale x 4 x float> %res
				}

				define <vscale x 8 x half> @ucvtf_htoh_movprfx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
				; CHECK-LABEL: ucvtf_htoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: ucvtf z0.h, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = uitofp <vscale x 8 x i16> %b to <vscale x 8 x half>
				ret <vscale x 8 x half> %res
				}

				define <vscale x 2 x double> @ucvtf_stod_movprfx(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
				; CHECK-LABEL: ucvtf_stod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: ucvtf z0.d, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = uitofp <vscale x 2 x i32> %b to <vscale x 2 x double>
				ret <vscale x 2 x double> %res
				}

				define <vscale x 4 x half> @ucvtf_stoh_movprfx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: ucvtf_stoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: ucvtf z0.h, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = uitofp <vscale x 4 x i32> %b to <vscale x 4 x half>
				ret <vscale x 4 x half> %res
				}

				define <vscale x 2 x float> @ucvtf_dtos_movprfx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: ucvtf_dtos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: ucvtf z0.s, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = uitofp <vscale x 2 x i64> %b to <vscale x 2 x float>
				ret <vscale x 2 x float> %res
				}

				define <vscale x 2 x half> @ucvtf_dtoh_movprfx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: ucvtf_dtoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: ucvtf z0.h, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = uitofp <vscale x 2 x i64> %b to <vscale x 2 x half>
				ret <vscale x 2 x half> %res
				}

				define <vscale x 2 x double> @ucvtf_dtod_movprfx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: ucvtf_dtod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = uitofp <vscale x 2 x i64> %b to <vscale x 2 x double>
				ret <vscale x 2 x double> %res
				}

				define <vscale x 8 x i16> @fcvtzs_htoh_movprfx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
				; CHECK-LABEL: fcvtzs_htoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzs z0.h, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = fptosi <vscale x 8 x half> %b to <vscale x 8 x i16>
				ret <vscale x 8 x i16> %res
				}

				define <vscale x 4 x i32> @fcvtzs_stos_movprfx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
				; CHECK-LABEL: fcvtzs_stos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = fptosi <vscale x 4 x float> %b to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i32> @fcvtzs_dtos_movprfx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
				; CHECK-LABEL: fcvtzs_dtos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = fptosi <vscale x 2 x double> %b to <vscale x 2 x i32>
				ret <vscale x 2 x i32> %res
				}

				define <vscale x 2 x i64> @fcvtzs_stod_movprfx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
				; CHECK-LABEL: fcvtzs_stod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = fptosi <vscale x 2 x float> %b to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 4 x i32> @fcvtzs_htos_movprfx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
				; CHECK-LABEL: fcvtzs_htos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = fptosi <vscale x 4 x half> %b to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @fcvtzs_htod_movprfx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
				; CHECK-LABEL: fcvtzs_htod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = fptosi <vscale x 2 x half> %b to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @fcvtzs_dtod_movprfx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
				; CHECK-LABEL: fcvtzs_dtod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = fptosi <vscale x 2 x double> %b to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 8 x i16> @fcvtzu_htoh_movprfx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
				; CHECK-LABEL: fcvtzu_htoh_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzu z0.h, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = fptoui <vscale x 8 x half> %b to <vscale x 8 x i16>
				ret <vscale x 8 x i16> %res
				}

				define <vscale x 4 x i32> @fcvtzu_stos_movprfx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
				; CHECK-LABEL: fcvtzu_stos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = fptoui <vscale x 4 x float> %b to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i32> @fcvtzu_dtos_movprfx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
				; CHECK-LABEL: fcvtzu_dtos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = fptoui <vscale x 2 x double> %b to <vscale x 2 x i32>
				ret <vscale x 2 x i32> %res
				}

				define <vscale x 2 x i64> @fcvtzu_stod_movprfx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
				; CHECK-LABEL: fcvtzu_stod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s
				; CHECK-NEXT: ret
				%res = fptoui <vscale x 2 x float> %b to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 4 x i32> @fcvtzu_htos_movprfx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
				; CHECK-LABEL: fcvtzu_htos_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = fptoui <vscale x 4 x half> %b to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @fcvtzu_htod_movprfx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
				; CHECK-LABEL: fcvtzu_htod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h
				; CHECK-NEXT: ret
				%res = fptoui <vscale x 2 x half> %b to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @fcvtzu_dtod_movprfx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
				; CHECK-LABEL: fcvtzu_dtod_movprfx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: movprfx z0, z1
				; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.d
				; CHECK-NEXT: ret
				%res = fptoui <vscale x 2 x double> %b to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}
				peterwaller-armUnsubmitted Not Done Reply Inline Actions It should be possible to exercise the relevant pattern without an explicit IR and, by analogy with `ucvtf_stod_movprfx`. The patterns of interest (involving and/sxtw, etc) exist to produce better code in the presence of unpacked types. peterwaller-arm: It should be possible to exercise the relevant pattern without an explicit IR and, by analogy…

llvm/test/CodeGen/AArch64/sve-fpext-load.ll

	Show All 17 Lines
	define <vscale x 4 x double> @ext4_f16_f64(<vscale x 4 x half> *%ptr, i64 %index) {			define <vscale x 4 x double> @ext4_f16_f64(<vscale x 4 x half> *%ptr, i64 %index) {
	; CHECK-LABEL: ext4_f16_f64:			; CHECK-LABEL: ext4_f16_f64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: uunpklo z1.d, z0.s			; CHECK-NEXT: uunpklo z1.d, z0.s
	; CHECK-NEXT: uunpkhi z2.d, z0.s			; CHECK-NEXT: uunpkhi z2.d, z0.s
				; CHECK-NEXT: movprfx z0, z1
	; CHECK-NEXT: fcvt z0.d, p0/m, z1.h			; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
				; CHECK-NEXT: movprfx z1, z2
	; CHECK-NEXT: fcvt z1.d, p0/m, z2.h			; CHECK-NEXT: fcvt z1.d, p0/m, z2.h
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = load <vscale x 4 x half>, <vscale x 4 x half>* %ptr, align 4			%load = load <vscale x 4 x half>, <vscale x 4 x half>* %ptr, align 4
	%load.ext = fpext <vscale x 4 x half> %load to <vscale x 4 x double>			%load.ext = fpext <vscale x 4 x half> %load to <vscale x 4 x double>
	ret <vscale x 4 x double> %load.ext			ret <vscale x 4 x double> %load.ext
	}			}

	; fpext <vscale x 8 x half> -> <vscale x 8 x double>			; fpext <vscale x 8 x half> -> <vscale x 8 x double>
	define <vscale x 8 x double> @ext8_f16_f64(<vscale x 8 x half> *%ptr, i64 %index) {			define <vscale x 8 x double> @ext8_f16_f64(<vscale x 8 x half> *%ptr, i64 %index) {
	; CHECK-LABEL: ext8_f16_f64:			; CHECK-LABEL: ext8_f16_f64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.h			; CHECK-NEXT: ptrue p0.h
	; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]			; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: uunpklo z1.s, z0.h			; CHECK-NEXT: uunpklo z1.s, z0.h
	; CHECK-NEXT: uunpkhi z0.s, z0.h			; CHECK-NEXT: uunpkhi z0.s, z0.h
	; CHECK-NEXT: uunpklo z2.d, z1.s			; CHECK-NEXT: uunpklo z2.d, z1.s
	; CHECK-NEXT: uunpkhi z1.d, z1.s			; CHECK-NEXT: uunpkhi z1.d, z1.s
	; CHECK-NEXT: uunpklo z3.d, z0.s			; CHECK-NEXT: uunpklo z3.d, z0.s
				; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
	; CHECK-NEXT: uunpkhi z4.d, z0.s			; CHECK-NEXT: uunpkhi z4.d, z0.s
				; CHECK-NEXT: movprfx z0, z2
				sdesmalenUnsubmitted Not Done Reply Inline Actions I see how this can be an improvement, but it's also a bit of a hammer. I would at least have expected a check somewhere to ask if a MOVPRFX is free/cheap, is it worth adding that? sdesmalen: I see how this can be an improvement, but it's also a bit of a hammer. I would at least have…
				bsmithUnsubmitted Not Done Reply Inline Actions I think one key thing to note here is that this patch is just bringing these floating point converts in line with all of the other unary operations, for which this change was done some time ago. For now I think it's probably best to get this patch in and then later, if we deem it necessary, add machinery to only do this when appropriate for all appropriate operations, not just this one. bsmith: I think one key thing to note here is that this patch is just bringing these floating point…
				sdesmalenUnsubmitted Not Done Reply Inline Actions Thanks for the clarification. Yes, I'm happy with the machinery being added for all operations in a separate patch. sdesmalen: Thanks for the clarification. Yes, I'm happy with the machinery being added for all operations…
	; CHECK-NEXT: fcvt z0.d, p0/m, z2.h			; CHECK-NEXT: fcvt z0.d, p0/m, z2.h
	; CHECK-NEXT: fcvt z1.d, p0/m, z1.h			; CHECK-NEXT: movprfx z2, z3
	; CHECK-NEXT: fcvt z2.d, p0/m, z3.h			; CHECK-NEXT: fcvt z2.d, p0/m, z3.h
				; CHECK-NEXT: movprfx z3, z4
	; CHECK-NEXT: fcvt z3.d, p0/m, z4.h			; CHECK-NEXT: fcvt z3.d, p0/m, z4.h
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = load <vscale x 8 x half>, <vscale x 8 x half>* %ptr, align 4			%load = load <vscale x 8 x half>, <vscale x 8 x half>* %ptr, align 4
	%load.ext = fpext <vscale x 8 x half> %load to <vscale x 8 x double>			%load.ext = fpext <vscale x 8 x half> %load to <vscale x 8 x double>
	ret <vscale x 8 x double> %load.ext			ret <vscale x 8 x double> %load.ext
	}			}

	; fpext <vscale x 2 x float> -> <vscale x 2 x double>			; fpext <vscale x 2 x float> -> <vscale x 2 x double>
	Show All 13 Lines
	define <vscale x 4 x double> @ext4_f32_f64(<vscale x 4 x float> *%ptr, i64 %index) {			define <vscale x 4 x double> @ext4_f32_f64(<vscale x 4 x float> *%ptr, i64 %index) {
	; CHECK-LABEL: ext4_f32_f64:			; CHECK-LABEL: ext4_f32_f64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: uunpklo z1.d, z0.s			; CHECK-NEXT: uunpklo z1.d, z0.s
	; CHECK-NEXT: uunpkhi z2.d, z0.s			; CHECK-NEXT: uunpkhi z2.d, z0.s
				; CHECK-NEXT: movprfx z0, z1
	; CHECK-NEXT: fcvt z0.d, p0/m, z1.s			; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
				; CHECK-NEXT: movprfx z1, z2
	; CHECK-NEXT: fcvt z1.d, p0/m, z2.s			; CHECK-NEXT: fcvt z1.d, p0/m, z2.s
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = load <vscale x 4 x float>, <vscale x 4 x float>* %ptr, align 4			%load = load <vscale x 4 x float>, <vscale x 4 x float>* %ptr, align 4
	%load.ext = fpext <vscale x 4 x float> %load to <vscale x 4 x double>			%load.ext = fpext <vscale x 4 x float> %load to <vscale x 4 x double>
	ret <vscale x 4 x double> %load.ext			ret <vscale x 4 x double> %load.ext
	}			}

llvm/test/CodeGen/AArch64/sve-split-fcvt.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s		; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

; FP_EXTEND		; FP_EXTEND

define <vscale x 8 x float> @fcvts_nxv8f16(<vscale x 8 x half> %a) {		define <vscale x 8 x float> @fcvts_nxv8f16(<vscale x 8 x half> %a) {
; CHECK-LABEL: fcvts_nxv8f16:		; CHECK-LABEL: fcvts_nxv8f16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpklo z1.s, z0.h		; CHECK-NEXT: uunpklo z1.s, z0.h
		; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpkhi z2.s, z0.h		; CHECK-NEXT: uunpkhi z2.s, z0.h
		; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvt z0.s, p0/m, z1.h		; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
		; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fcvt z1.s, p0/m, z2.h		; CHECK-NEXT: fcvt z1.s, p0/m, z2.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = fpext <vscale x 8 x half> %a to <vscale x 8 x float>		%res = fpext <vscale x 8 x half> %a to <vscale x 8 x float>
ret <vscale x 8 x float> %res		ret <vscale x 8 x float> %res
}		}

define <vscale x 4 x double> @fcvtd_nxv4f16(<vscale x 4 x half> %a) {		define <vscale x 4 x double> @fcvtd_nxv4f16(<vscale x 4 x half> %a) {
; CHECK-LABEL: fcvtd_nxv4f16:		; CHECK-LABEL: fcvtd_nxv4f16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s		; CHECK-NEXT: uunpklo z1.d, z0.s
		; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpkhi z2.d, z0.s		; CHECK-NEXT: uunpkhi z2.d, z0.s
		; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvt z0.d, p0/m, z1.h		; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
		; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fcvt z1.d, p0/m, z2.h		; CHECK-NEXT: fcvt z1.d, p0/m, z2.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = fpext <vscale x 4 x half> %a to <vscale x 4 x double>		%res = fpext <vscale x 4 x half> %a to <vscale x 4 x double>
ret <vscale x 4 x double> %res		ret <vscale x 4 x double> %res
}		}

define <vscale x 8 x double> @fcvtd_nxv8f16(<vscale x 8 x half> %a) {		define <vscale x 8 x double> @fcvtd_nxv8f16(<vscale x 8 x half> %a) {
; CHECK-LABEL: fcvtd_nxv8f16:		; CHECK-LABEL: fcvtd_nxv8f16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.s, z0.h		; CHECK-NEXT: uunpklo z1.s, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: ptrue p0.d		; CHECK-NEXT: ptrue p0.d
		; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z2.d, z1.s		; CHECK-NEXT: uunpklo z2.d, z1.s
; CHECK-NEXT: uunpkhi z1.d, z1.s		; CHECK-NEXT: uunpkhi z1.d, z1.s
; CHECK-NEXT: uunpklo z3.d, z0.s		; CHECK-NEXT: uunpklo z3.d, z0.s
		; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
; CHECK-NEXT: uunpkhi z4.d, z0.s		; CHECK-NEXT: uunpkhi z4.d, z0.s
		; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvt z0.d, p0/m, z2.h		; CHECK-NEXT: fcvt z0.d, p0/m, z2.h
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h		; CHECK-NEXT: movprfx z2, z3
; CHECK-NEXT: fcvt z2.d, p0/m, z3.h		; CHECK-NEXT: fcvt z2.d, p0/m, z3.h
		; CHECK-NEXT: movprfx z3, z4
; CHECK-NEXT: fcvt z3.d, p0/m, z4.h		; CHECK-NEXT: fcvt z3.d, p0/m, z4.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = fpext <vscale x 8 x half> %a to <vscale x 8 x double>		%res = fpext <vscale x 8 x half> %a to <vscale x 8 x double>
ret <vscale x 8 x double> %res		ret <vscale x 8 x double> %res
}		}

define <vscale x 4 x double> @fcvtd_nxv4f32(<vscale x 4 x float> %a) {		define <vscale x 4 x double> @fcvtd_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvtd_nxv4f32:		; CHECK-LABEL: fcvtd_nxv4f32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s		; CHECK-NEXT: uunpklo z1.d, z0.s
		; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpkhi z2.d, z0.s		; CHECK-NEXT: uunpkhi z2.d, z0.s
		; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvt z0.d, p0/m, z1.s		; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
		; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fcvt z1.d, p0/m, z2.s		; CHECK-NEXT: fcvt z1.d, p0/m, z2.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = fpext <vscale x 4 x float> %a to <vscale x 4 x double>		%res = fpext <vscale x 4 x float> %a to <vscale x 4 x double>
ret <vscale x 4 x double> %res		ret <vscale x 4 x double> %res
}		}

define <vscale x 8 x double> @fcvtd_nxv8f32(<vscale x 8 x float> %a) {		define <vscale x 8 x double> @fcvtd_nxv8f32(<vscale x 8 x float> %a) {
; CHECK-LABEL: fcvtd_nxv8f32:		; CHECK-LABEL: fcvtd_nxv8f32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z2.d, z0.s		; CHECK-NEXT: uunpklo z2.d, z0.s
; CHECK-NEXT: uunpkhi z3.d, z0.s		; CHECK-NEXT: uunpkhi z3.d, z0.s
		; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z4.d, z1.s		; CHECK-NEXT: uunpklo z4.d, z1.s
; CHECK-NEXT: uunpkhi z5.d, z1.s		; CHECK-NEXT: uunpkhi z5.d, z1.s
		; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvt z0.d, p0/m, z2.s		; CHECK-NEXT: fcvt z0.d, p0/m, z2.s
		; CHECK-NEXT: movprfx z1, z3
; CHECK-NEXT: fcvt z1.d, p0/m, z3.s		; CHECK-NEXT: fcvt z1.d, p0/m, z3.s
		; CHECK-NEXT: movprfx z2, z4
; CHECK-NEXT: fcvt z2.d, p0/m, z4.s		; CHECK-NEXT: fcvt z2.d, p0/m, z4.s
		; CHECK-NEXT: movprfx z3, z5
; CHECK-NEXT: fcvt z3.d, p0/m, z5.s		; CHECK-NEXT: fcvt z3.d, p0/m, z5.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = fpext <vscale x 8 x float> %a to <vscale x 8 x double>		%res = fpext <vscale x 8 x float> %a to <vscale x 8 x double>
ret <vscale x 8 x double> %res		ret <vscale x 8 x double> %res
}		}

; FP_ROUND		; FP_ROUND

▲ Show 20 Lines • Show All 94 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i16>		%res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i16>
ret <vscale x 8 x i16> %res		ret <vscale x 8 x i16> %res
}		}

; Split result		; Split result
define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) {		define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvtzs_d_nxv4f32:		; CHECK-LABEL: fcvtzs_d_nxv4f32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s		; CHECK-NEXT: uunpklo z1.d, z0.s
		; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpkhi z2.d, z0.s		; CHECK-NEXT: uunpkhi z2.d, z0.s
		; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s		; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
		; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s		; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>		%res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
ret <vscale x 4 x i64> %res		ret <vscale x 4 x i64> %res
}		}

define <vscale x 16 x i32> @fcvtzs_s_nxv16f16(<vscale x 16 x half> %a) {		define <vscale x 16 x i32> @fcvtzs_s_nxv16f16(<vscale x 16 x half> %a) {
; CHECK-LABEL: fcvtzs_s_nxv16f16:		; CHECK-LABEL: fcvtzs_s_nxv16f16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpklo z2.s, z0.h		; CHECK-NEXT: uunpklo z2.s, z0.h
; CHECK-NEXT: uunpkhi z3.s, z0.h		; CHECK-NEXT: uunpkhi z3.s, z0.h
		; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpklo z4.s, z1.h		; CHECK-NEXT: uunpklo z4.s, z1.h
; CHECK-NEXT: uunpkhi z5.s, z1.h		; CHECK-NEXT: uunpkhi z5.s, z1.h
		; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h		; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h
		; CHECK-NEXT: movprfx z1, z3
; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h		; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h
		; CHECK-NEXT: movprfx z2, z4
; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h		; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h
		; CHECK-NEXT: movprfx z3, z5
; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h		; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = fptosi <vscale x 16 x half> %a to <vscale x 16 x i32>		%res = fptosi <vscale x 16 x half> %a to <vscale x 16 x i32>
ret <vscale x 16 x i32> %res		ret <vscale x 16 x i32> %res
}		}

; FP_TO_UINT		; FP_TO_UINT

Show All 9 Lines	; CHECK-NEXT: ret
%res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>		%res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>
ret <vscale x 4 x i32> %res		ret <vscale x 4 x i32> %res
}		}

; Split result		; Split result
define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {		define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvtzu_d_nxv4f32:		; CHECK-LABEL: fcvtzu_d_nxv4f32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s		; CHECK-NEXT: uunpklo z1.d, z0.s
		; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpkhi z2.d, z0.s		; CHECK-NEXT: uunpkhi z2.d, z0.s
		; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s		; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s
		; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s		; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>		%res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
ret <vscale x 4 x i64> %res		ret <vscale x 4 x i64> %res
}		}

; SINT_TO_FP		; SINT_TO_FP

Show All 26 Lines	; CHECK-NEXT: ret
ret <vscale x 8 x half> %res		ret <vscale x 8 x half> %res
}		}

; Split result		; Split result
define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {		define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: scvtf_s_nxv16i8:		; CHECK-LABEL: scvtf_s_nxv16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: sunpklo z1.h, z0.b		; CHECK-NEXT: sunpklo z1.h, z0.b
; CHECK-NEXT: sunpkhi z0.h, z0.b
; CHECK-NEXT: ptrue p0.s		; CHECK-NEXT: ptrue p0.s
		; CHECK-NEXT: sunpkhi z0.h, z0.b
; CHECK-NEXT: sunpklo z2.s, z1.h		; CHECK-NEXT: sunpklo z2.s, z1.h
; CHECK-NEXT: sunpkhi z1.s, z1.h		; CHECK-NEXT: sunpkhi z1.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h		; CHECK-NEXT: sunpklo z3.s, z0.h
		; CHECK-NEXT: scvtf z1.s, p0/m, z1.s
; CHECK-NEXT: sunpkhi z4.s, z0.h		; CHECK-NEXT: sunpkhi z4.s, z0.h
		; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: scvtf z0.s, p0/m, z2.s		; CHECK-NEXT: scvtf z0.s, p0/m, z2.s
; CHECK-NEXT: scvtf z1.s, p0/m, z1.s		; CHECK-NEXT: movprfx z2, z3
; CHECK-NEXT: scvtf z2.s, p0/m, z3.s		; CHECK-NEXT: scvtf z2.s, p0/m, z3.s
		; CHECK-NEXT: movprfx z3, z4
; CHECK-NEXT: scvtf z3.s, p0/m, z4.s		; CHECK-NEXT: scvtf z3.s, p0/m, z4.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float>		%res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float>
ret <vscale x 16 x float> %res		ret <vscale x 16 x float> %res
}		}

define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {		define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: scvtf_d_nxv4i32:		; CHECK-LABEL: scvtf_d_nxv4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sunpklo z1.d, z0.s		; CHECK-NEXT: sunpklo z1.d, z0.s
		; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sunpkhi z2.d, z0.s		; CHECK-NEXT: sunpkhi z2.d, z0.s
		; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: scvtf z0.d, p0/m, z1.d		; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
		; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: scvtf z1.d, p0/m, z2.d		; CHECK-NEXT: scvtf z1.d, p0/m, z2.d
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = sitofp <vscale x 4 x i32> %a to <vscale x 4 x double>		%res = sitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
ret <vscale x 4 x double> %res		ret <vscale x 4 x double> %res
}		}

define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {		define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_d_nxv4i1:		; CHECK-LABEL: scvtf_d_nxv4i1:
Show All 40 Lines	; CHECK-NEXT: ret
%res = uitofp <vscale x 8 x i64> %a to <vscale x 8 x half>		%res = uitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
ret <vscale x 8 x half> %res		ret <vscale x 8 x half> %res
}		}

; Split result		; Split result
define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {		define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: ucvtf_d_nxv4i32:		; CHECK-LABEL: ucvtf_d_nxv4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s		; CHECK-NEXT: uunpklo z1.d, z0.s
		; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpkhi z2.d, z0.s		; CHECK-NEXT: uunpkhi z2.d, z0.s
		; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d		; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
		; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d		; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = uitofp <vscale x 4 x i32> %a to <vscale x 4 x double>		%res = uitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
ret <vscale x 4 x double> %res		ret <vscale x 4 x double> %res
}		}

define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {		define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: ucvtf_d_nxv4i1:		; CHECK-LABEL: ucvtf_d_nxv4i1:
Show All 12 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Remove false register dependency for unary FP convert operations
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 405617

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-fcvt.ll

llvm/test/CodeGen/AArch64/sve-fpext-load.ll

llvm/test/CodeGen/AArch64/sve-split-fcvt.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Remove false register dependency for unary FP convert operationsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 405617

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-fcvt.ll

llvm/test/CodeGen/AArch64/sve-fpext-load.ll

llvm/test/CodeGen/AArch64/sve-split-fcvt.ll

[AArch64][SVE] Remove false register dependency for unary FP convert operations
ClosedPublic