diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2368,12 +2368,12 @@ defm : ld1rq_pat; defm : ld1rq_pat; - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_UNDEF_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_S_UNDEF (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S_UNDEF (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H_UNDEF (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>; // General case that we ideally never want to match. def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -1998,174 +1998,172 @@ // Loop control, based on GPR def : InstRW<[V2Write_3cyc_2M], - (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; -def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>; + (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>; +def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>; // Loop terminate -def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; +def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>; // Predicate counting scalar def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; def : InstRW<[V2Write_2cyc_1M], - (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$", - "^SQ(DEC|INC)[BHWD]_XPiWdI$", - "^UQ(DEC|INC)[BHWD]_WPiI$")>; + (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI", + "^SQ(DEC|INC)[BHWD]_XPiWdI", + "^UQ(DEC|INC)[BHWD]_WPiI")>; // Predicate counting scalar, ALL, {1,2,4} -def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI$")>; +def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>; // Predicate counting scalar, active predicate def : InstRW<[V2Write_2cyc_1M], - (instregex "^CNTP_XPP_[BHSD]$", - "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$", - "^(UQDEC|UQINC)P_WP_[BHSD]$", - "^(SQDEC|SQINC)P_XPWd_[BHSD]$")>; + (instregex "^CNTP_XPP_[BHSD]", + "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]", + "^(UQDEC|UQINC)P_WP_[BHSD]", + "^(SQDEC|SQINC)P_XPWd_[BHSD]")>; // Predicate counting vector, active predicate def : InstRW<[V2Write_7cyc_1M_1M0_1V], - (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>; + (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>; // Predicate logical def : InstRW<[V2Write_1or2cyc_1M0], - (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; + (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>; // Predicate logical, flag setting def : InstRW<[V2Write_1or2cyc_1M0_1M], - (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>; + (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>; // Predicate reverse -def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>; // Predicate select def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>; // Predicate set -def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>; // Predicate set/initialize, set flags -def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]$")>; +def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>; // Predicate find first/next -def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>; // Predicate test def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>; // Predicate transpose -def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>; // Predicate unpack and widen def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; // Predicate zip/unzip -def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>; // SVE integer instructions // ----------------------------------------------------------------------------- // Arithmetic, absolute diff -def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$", - "^[SU]ABD_ZPZZ_UNDEF_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]", + "^[SU]ABD_ZPZZ_[BHSD]")>; // Arithmetic, absolute diff accum -def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>; +def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>; // Arithmetic, absolute diff accum long -def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>; +def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>; // Arithmetic, absolute diff long -def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>; // Arithmetic, basic def : InstRW<[V2Write_2cyc_1V], - (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", - "^(ABS|CNOT|NEG)_ZPmZ_UNDEF_[BHSD]$", - "^(ADD|SUB)_ZZZ_[BHSD]$", - "^(ADD|SUB|SUBR)_ZI_[BHSD]$", - "^ADR_[SU]XTW_ZZZ_D_[0123]$", - "^ADR_LSL_ZZZ_[SD]_[0123]$", - "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$", - "^SADDLBT_ZZZ_[HSD]$", - "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$", - "^SSUBL(BT|TB)_ZZZ_[HSD]$")>; + (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]", + "^(ADD|SUB)_ZZZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZI_[BHSD]", + "^ADR_[SU]XTW_ZZZ_D_[0123]", + "^ADR_LSL_ZZZ_[SD]_[0123]", + "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", + "^SADDLBT_ZZZ_[HSD]", + "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", + "^SSUBL(BT|TB)_ZZZ_[HSD]")>; // Arithmetic, complex def : InstRW<[V2Write_2cyc_1V], - (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$", - "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", - "^SQ(ABS|NEG)_ZPmZ_UNDEF_[BHSD]$", - "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$", - "^[SU]Q(ADD|SUB)_ZI_[BHSD]$", - "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$", - "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>; + (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]", + "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]", + "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]", + "^[SU]Q(ADD|SUB)_ZI_[BHSD]", + "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]", + "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>; // Arithmetic, large integer -def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>; // Arithmetic, pairwise add -def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>; // Arithmetic, pairwise add and accum long def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA], - (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>; + (instregex "^[SU]ADALP_ZPmZ_[HSD]")>; // Arithmetic, shift def : InstRW<[V2Write_2cyc_1V13], - (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$", - "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$", - "^(ASR|LSL|LSR)_ZPmI_[BHSD]$", - "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$", - "^(ASR|LSL|LSR)_ZZI_[BHSD]$", - "^(ASR|LSL|LSR)_ZPZ[IZ]_UNDEF_[BHSD]$", - "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>; + (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]", + "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]", + "^(ASR|LSL|LSR)_ZPmI_[BHSD]", + "^(ASR|LSL|LSR)_ZPmZ_[BHSD]", + "^(ASR|LSL|LSR)_ZZI_[BHSD]", + "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", + "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; // Arithmetic, shift and accumulate -def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]$")>; +def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>; // Arithmetic, shift by immediate -def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]$", - "^[SU]SHLL[BT]_ZZI_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]", + "^[SU]SHLL[BT]_ZZI_[HSD]")>; // Arithmetic, shift by immediate and insert -def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>; // Arithmetic, shift complex def : InstRW<[V2Write_4cyc_1V13], - (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$", - "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$", - "^[SU]QR?SHL_ZPZZ_UNDEF_[BHSD]$", - "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$", - "^SQSHRU?N[BT]_ZZI_[BHS]$", - "^UQR?SHRN[BT]_ZZI_[BHS]$")>; + (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]", + "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]", + "^[SU]QR?SHL_ZPZZ_[BHSD]", + "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]", + "^SQSHRU?N[BT]_ZZI_[BHS]", + "^UQR?SHRN[BT]_ZZI_[BHS]")>; // Arithmetic, shift right for divide -def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_ZPmI_[BHSD]$")>; +def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; // Arithmetic, shift rounding -def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]$", - "^[SU]RSHL_ZPZZ_UNDEF_[BHSD]$", - "^[SU]RSHR_ZPmI_[BHSD]$")>; +def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]", + "^[SU]RSHL_ZPZZ_[BHSD]", + "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>; // Bit manipulation -def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>; +def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>; // Bitwise select -def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>; // Count/reverse bits -def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$", - "^(CLS|CLZ|CNT)_ZPmZ_UNDEF_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; // Broadcast logical bitmask immediate to vector def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>; // Compare and set flags def : InstRW<[V2Write_4or5cyc_1V0_1M0], - (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", - "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; + (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]", + "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>; // Complex add -def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>; // Complex dot product 8-bit element def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; @@ -2174,201 +2172,201 @@ def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; // Complex multiply-add B, H, S element size -def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]$", - "^CMLA_ZZZI_[HS]$")>; +def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]", + "^CMLA_ZZZI_[HS]")>; // Complex multiply-add D element size def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>; // Conditional extract operations, scalar form -def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; +def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>; // Conditional extract operations, SIMD&FP scalar and vector forms -def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", - "^COMPACT_ZPZ_[SD]$", - "^SPLICE_ZPZZ?_[BHSD]$")>; +def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]", + "^COMPACT_ZPZ_[SD]", + "^SPLICE_ZPZZ?_[BHSD]")>; // Convert to floating point, 64b to float or convert to double -def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD](_UNDEF)?$", - "^[SU]CVTF_ZPmZ_StoD(_UNDEF)?$")>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", + "^[SU]CVTF_ZPmZ_StoD")>; // Convert to floating point, 32b to single or half -def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS](_UNDEF)?$")>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; // Convert to floating point, 16b to half -def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH(_UNDEF)?$")>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; // Copy, scalar -def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>; +def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>; // Copy, scalar SIMD&FP or imm -def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$", - "^CPY_ZPzI_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]", + "^CPY_ZPzI_[BHSD]")>; // Divides, 32 bit -def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$", - "^[SU]DIV_ZPZZ_UNDEF_S$")>; +def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", + "^[SU]DIV_ZPZZ_S")>; // Divides, 64 bit -def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$", - "^[SU]DIV_ZPZZ_UNDEF_D$")>; +def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", + "^[SU]DIV_ZPZZ_D")>; // Dot product, 8 bit -def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>; +def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>; // Dot product, 8 bit, using signed and unsigned integers def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; // Dot product, 16 bit -def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>; +def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>; // Duplicate, immediate and indexed form -def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$", - "^DUP_ZZI_[BHSDQ]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]", + "^DUP_ZZI_[BHSDQ]")>; // Duplicate, scalar form -def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>; +def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>; // Extend, sign or zero -def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ(_UNDEF)?_[HSD]$", - "^[SU]XTH_ZPmZ(_UNDEF)?_[SD]$", - "^[SU]XTW_ZPmZ(_UNDEF)?_[D]$")>; +def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]", + "^[SU]XTH_ZPmZ_[SD]", + "^[SU]XTW_ZPmZ_[D]")>; // Extract def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; // Extract narrow saturating -def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$", - "^SQXTUN[BT]_ZZ_[BHS]$")>; +def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]", + "^SQXTUN[BT]_ZZ_[BHS]")>; // Extract/insert operation, SIMD and FP scalar form -def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", - "^INSR_ZV_[BHSD]$")>; +def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]", + "^INSR_ZV_[BHSD]")>; // Extract/insert operation, scalar -def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$", - "^INSR_ZR_[BHSD]$")>; +def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]", + "^INSR_ZR_[BHSD]")>; // Histogram operations -def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$", - "^HISTSEG_ZZZ$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]", + "^HISTSEG_ZZZ")>; // Horizontal operations, B, H, S form, immediate operands only -def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>; // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar // operands only / immediate, scalar operands -def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; +def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>; // Horizontal operations, D form, immediate operands only def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>; // Horizontal operations, D form, scalar, immediate operands)/ scalar operands // only / immediate, scalar operands -def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D$")>; +def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>; // Logical def : InstRW<[V2Write_2cyc_1V], - (instregex "^(AND|EOR|ORR)_ZI$", - "^(AND|BIC|EOR|ORR)_ZZZ$", - "^EOR(BT|TB)_ZZZ_[BHSD]$", - "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$", - "^NOT_ZPmZ_UNDEF_[BHSD]$")>; + (instregex "^(AND|EOR|ORR)_ZI", + "^(AND|BIC|EOR|ORR)_ZZZ", + "^EOR(BT|TB)_ZZZ_[BHSD]", + "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]", + "^(AND|BIC|EOR|NOT|ORR)_ZPZZ_[BHSD]", + "^NOT_ZPmZ_[BHSD]")>; // Max/min, basic and pairwise -def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$", - "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$", - "^[SU](MAX|MIN)_ZPZZ_UNDEF_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]", + "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]", + "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>; // Matching operations // FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the // latency for this instruction is 4 cycles. -def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>; +def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>; // Matrix multiply-accumulate def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; // Move prefix -def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", - "^MOVPRFX_ZZ$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]", + "^MOVPRFX_ZZ")>; // Multiply, B, H, S element size -def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$", - "^MUL_ZPZZ_UNDEF_[BHS]$", - "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$", - "^[SU]MULH_ZPZZ_UNDEF_[BHS]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", + "^MUL_ZPZZ_[BHS]", + "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", + "^[SU]MULH_ZPZZ_[BHS]")>; // Multiply, D element size -def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$", - "^MUL_ZPZZ_UNDEF_D$", - "^[SU]MULH_(ZPmZ|ZZZ)_D$", - "^[SU]MULH_ZPZZ_UNDEF_D$")>; +def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", + "^MUL_ZPZZ_D", + "^[SU]MULH_(ZPmZ|ZZZ)_D", + "^[SU]MULH_ZPZZ_D")>; // Multiply long -def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$", - "^[SU]MULL[BT]_ZZZ_[HSD]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]", + "^[SU]MULL[BT]_ZZZ_[HSD]")>; // Multiply accumulate, B, H, S element size def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS], - (instregex "^ML[AS]_ZZZI_[HS]$", "^ML[AS]_ZPZZZ_UNDEF_[BHS]$")>; + (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>; def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS], - (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>; + (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; // Multiply accumulate, D element size def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD], - (instregex "^ML[AS]_ZZZI_D$", "^ML[AS]_ZPZZZ_UNDEF_D$")>; + (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>; def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD], - (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>; + (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>; // Multiply accumulate long -def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$", - "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>; +def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]", + "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>; // Multiply accumulate saturating doubling long regular def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ], - (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]$", - "^SQDML[AS]L[BT]_ZZZI_[SD]$")>; + (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]", + "^SQDML[AS]L[BT]_ZZZI_[SD]")>; // Multiply saturating doubling high, B, H, S element size -def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]$", - "^SQDMULH_ZZZI_[HS]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]", + "^SQDMULH_ZZZI_[HS]")>; // Multiply saturating doubling high, D element size def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; // Multiply saturating doubling long -def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$", - "^SQDMULL[BT]_ZZZI_[SD]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]", + "^SQDMULL[BT]_ZZZI_[SD]")>; // Multiply saturating rounding doubling regular/complex accumulate, B, H, S // element size -def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$", - "^SQRDCMLAH_ZZZ_[BHS]$", - "^SQRDML[AS]H_ZZZI_[HS]$", - "^SQRDCMLAH_ZZZI_[HS]$")>; +def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]", + "^SQRDCMLAH_ZZZ_[BHS]", + "^SQRDML[AS]H_ZZZI_[HS]", + "^SQRDCMLAH_ZZZI_[HS]")>; // Multiply saturating rounding doubling regular/complex accumulate, D element // size -def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D$", - "^SQRDCMLAH_ZZZ_D$")>; +def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D", + "^SQRDCMLAH_ZZZ_D")>; // Multiply saturating rounding doubling regular/complex, B, H, S element size -def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]$", - "^SQRDMULH_ZZZI_[HS]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]", + "^SQRDMULH_ZZZI_[HS]")>; // Multiply saturating rounding doubling regular/complex, D element size -def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D$")>; +def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>; // Multiply/multiply long, (8x8) polynomial -def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B$", - "^PMULL[BT]_ZZZ_[HDQ]$")>; +def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B", + "^PMULL[BT]_ZZZ_[HDQ]")>; // Predicate counting vector -def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>; // Reciprocal estimate -def : InstRW<[V2Write_4cyc_2V02], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S, - URECPE_ZPmZ_UNDEF_S, URSQRTE_ZPmZ_UNDEF_S)>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; // Reduction, arithmetic, B form def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; @@ -2383,47 +2381,47 @@ def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; // Reduction, logical -def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>; +def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>; // Reverse, vector -def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$", - "^REVB_ZPmZ_[HSD]$", - "^REVH_ZPmZ_[SD]$", - "^REVW_ZPmZ_D$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]", + "^REVB_ZPmZ_[HSD]", + "^REVH_ZPmZ_[SD]", + "^REVW_ZPmZ_D")>; // Select, vector form -def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>; // Table lookup -def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>; // Table lookup extension -def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>; // Transpose, vector form -def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>; // Unpack and extend -def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>; // Zip/unzip -def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>; // SVE floating-point instructions // ----------------------------------------------------------------------------- // Floating point absolute value/difference -def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$", - "^FABD_ZPZZ_UNDEF_[HSD]$", - "^FABS_ZPmZ_UNDEF_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]", + "^FABD_ZPZZ_[HSD]", + "^FABS_ZPmZ_[HSD]")>; // Floating point arithmetic -def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", - "^F(ADD|SUB)_ZPZ[IZ]_UNDEF_[HSD]$", - "^FADDP_ZPmZZ_[HSD]$", - "^FNEG_ZPmZ(_UNDEF)?_[HSD]$", - "^FSUBR_ZPm[IZ]_[HSD]$", - "^FSUBR_ZPZI_UNDEF_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", + "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", + "^FADDP_ZPmZZ_[HSD]", + "^FNEG_ZPmZ_[HSD]", + "^FSUBR_ZPm[IZ]_[HSD]", + "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; // Floating point associative add, F16 def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>; @@ -2435,144 +2433,138 @@ def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; // Floating point compare -def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$", - "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$", - "^FCM(LE|LT)_PPzZ0_[HSD]$", - "^FCMUO_PPzZZ_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]", + "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]", + "^FCM(LE|LT)_PPzZ0_[HSD]", + "^FCMUO_PPzZZ_[HSD]")>; // Floating point complex add -def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>; +def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>; // Floating point complex multiply add -def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]$")>; -def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]$")>; +def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>; +def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>; // Floating point convert, long or narrow (F16 to F32 or F32 to F16) -def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)(_UNDEF)?$", - "^FCVTLT_ZPmZ_HtoS$", - "^FCVTNT_ZPmZ_StoH$")>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", + "^FCVTLT_ZPmZ_HtoS", + "^FCVTNT_ZPmZ_StoH")>; // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 // or F64 to F16) -def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)(_UNDEF)?$", - "^FCVTLT_ZPmZ_StoD$", - "^FCVTNT_ZPmZ_DtoS$")>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", + "^FCVTLT_ZPmZ_StoD", + "^FCVTNT_ZPmZ_DtoS")>; // Floating point convert, round to odd def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; // Floating point base2 log, F16 -def : InstRW<[V2Write_6cyc_4V02], (instrs FLOGB_ZPmZ_H)>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>; // Floating point base2 log, F32 -def : InstRW<[V2Write_4cyc_2V02], (instrs FLOGB_ZPmZ_S)>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>; // Floating point base2 log, F64 -def : InstRW<[V2Write_3cyc_1V02], (instrs FLOGB_ZPmZ_D)>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>; // Floating point convert to integer, F16 -def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH(_UNDEF)?$")>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; // Floating point convert to integer, F32 -def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)(_UNDEF)?$")>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; // Floating point convert to integer, F64 def : InstRW<[V2Write_3cyc_1V02], - (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)(_UNDEF)?$")>; + (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; // Floating point copy -def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$", - "^FDUP_ZI_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]", + "^FDUP_ZI_[HSD]")>; // Floating point divide, F16 -def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_ZPmZ_H$", - "^FDIV_ZPZZ_UNDEF_H$")>; +def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; // Floating point divide, F32 -def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_ZPmZ_S$", - "^FDIV_ZPZZ_UNDEF_S$")>; +def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; // Floating point divide, F64 -def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_ZPmZ_D$", - "^FDIV_ZPZZ_UNDEF_D$")>; +def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; // Floating point min/max pairwise -def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>; // Floating point min/max -def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$", - "^F(MAX|MIN)(NM)?_ZPZ[IZ]_UNDEF_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", + "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; // Floating point multiply -def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$", - "^FMULX_ZPZZ_UNDEF_[HSD]$", - "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$", - "^FMUL_ZPZ[IZ]_UNDEF_[HSD]$")>; +def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", + "^FMULX_ZPZZ_[HSD]", + "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", + "^FMUL_ZPZ[IZ]_[HSD]")>; // Floating point multiply accumulate def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA], - (instregex "^FN?ML[AS]_ZPmZZ_[HSD]$", - "^FN?(MAD|MSB)_ZPmZZ_[HSD]$")>; + (instregex "^FN?ML[AS]_ZPmZZ_[HSD]", + "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>; def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA], - (instregex "^FML[AS]_ZZZI_[HSD]$", - "^FN?ML[AS]_ZPZZZ_UNDEF_[HSD]$")>; + (instregex "^FML[AS]_ZZZI_[HSD]", + "^FN?ML[AS]_ZPZZZ_[HSD]")>; // Floating point multiply add/sub accumulate long -def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>; +def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>; // Floating point reciprocal estimate, F16 -def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H, - FRSQRTE_ZZ_H, FRECPX_ZPmZ_UNDEF_H)>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>; // Floating point reciprocal estimate, F32 -def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S, - FRSQRTE_ZZ_S, FRECPX_ZPmZ_UNDEF_S)>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>; // Floating point reciprocal estimate, F64 -def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D, - FRSQRTE_ZZ_D, FRECPX_ZPmZ_UNDEF_D)>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>; // Floating point reciprocal step -def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; +def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>; // Floating point reduction, F16 def : InstRW<[V2Write_8cyc_4V], - (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>; + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>; // Floating point reduction, F32 def : InstRW<[V2Write_6cyc_3V], - (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>; + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>; // Floating point reduction, F64 def : InstRW<[V2Write_4cyc_2V], - (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>; + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>; // Floating point round to integral, F16 -def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_H$")>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; // Floating point round to integral, F32 -def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_S$")>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; // Floating point round to integral, F64 -def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_D$")>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; // Floating point square root, F16 -def : InstRW<[V2Write_13cyc_1V0_12rc], (instrs FSQRT_ZPmZ_H, FSQRT_ZPmZ_UNDEF_H)>; +def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>; // Floating point square root, F32 -def : InstRW<[V2Write_10cyc_1V0_9rc], (instrs FSQRT_ZPmZ_S, FSQRT_ZPmZ_UNDEF_S)>; +def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>; // Floating point square root, F64 -def : InstRW<[V2Write_16cyc_1V0_14rc], (instrs FSQRT_ZPmZ_D, FSQRT_ZPmZ_UNDEF_D)>; +def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>; // Floating point trigonometric exponentiation -def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>; +def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>; // Floating point trigonometric multiply add -def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>; +def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>; // Floating point trigonometric, miscellaneous -def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; +def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>; // SVE BFloat16 (BF16) instructions // ----------------------------------------------------------------------------- @@ -2587,7 +2579,7 @@ def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>; // Multiply accumulate long -def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?$")>; +def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>; // SVE Load instructions // ----------------------------------------------------------------------------- diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2111,13 +2111,13 @@ } multiclass sve_fp_2op_p_zds_zeroing_hsd { - def _ZERO_H : PredTwoOpPseudo; - def _ZERO_S : PredTwoOpPseudo; - def _ZERO_D : PredTwoOpPseudo; + def _H_ZERO : PredTwoOpPseudo; + def _S_ZERO : PredTwoOpPseudo; + def _D_ZERO : PredTwoOpPseudo; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; + def : SVE_3_Op_Pat_SelZero(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_SelZero(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_SelZero(NAME # _D_ZERO)>; } class sve_fp_ftmad sz, string asm, ZPRRegOp zprty> @@ -2157,36 +2157,36 @@ } multiclass sve_fp_2op_i_p_zds_hfd { - def _UNDEF_H : PredTwoOpImmPseudo; - def _UNDEF_S : PredTwoOpImmPseudo; - def _UNDEF_D : PredTwoOpImmPseudo; - - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_D")>; - def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_D")>; + def _H_UNDEF : PredTwoOpImmPseudo; + def _S_UNDEF : PredTwoOpImmPseudo; + def _D_UNDEF : PredTwoOpImmPseudo; + + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_S_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_S_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_S_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_S_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_D_UNDEF")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_D_UNDEF")>; } multiclass sve_fp_2op_i_p_zds_zeroing_hfd { - def _ZERO_H : PredTwoOpImmPseudo; - def _ZERO_S : PredTwoOpImmPseudo; - def _ZERO_D : PredTwoOpImmPseudo; + def _H_ZERO : PredTwoOpImmPseudo; + def _S_ZERO : PredTwoOpImmPseudo; + def _D_ZERO : PredTwoOpImmPseudo; let AddedComplexity = 2 in { - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_H")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_H")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_S")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_S")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_D")>; - def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_D")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_H_ZERO")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_H_ZERO")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_S_ZERO")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_S_ZERO")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_D_ZERO")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_D_ZERO")>; } } @@ -2935,16 +2935,16 @@ def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; + def _H_UNDEF : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } multiclass sve2_fp_flogb { @@ -2961,13 +2961,13 @@ } multiclass sve2_fp_un_pred_zeroing_hsd { - def _ZERO_H : PredOneOpPassthruPseudo; - def _ZERO_S : PredOneOpPassthruPseudo; - def _ZERO_D : PredOneOpPassthruPseudo; + def _H_ZERO : PredOneOpPassthruPseudo; + def _S_ZERO : PredOneOpPassthruPseudo; + def _D_ZERO : PredOneOpPassthruPseudo; - def : SVE_1_Op_PassthruZero_Pat(NAME # _ZERO_H)>; - def : SVE_1_Op_PassthruZero_Pat(NAME # _ZERO_S)>; - def : SVE_1_Op_PassthruZero_Pat(NAME # _ZERO_D)>; + def : SVE_1_Op_PassthruZero_Pat(NAME # _H_ZERO)>; + def : SVE_1_Op_PassthruZero_Pat(NAME # _S_ZERO)>; + def : SVE_1_Op_PassthruZero_Pat(NAME # _D_ZERO)>; } multiclass sve2_fp_convert_down_odd_rounding { @@ -3225,16 +3225,16 @@ //class for generating pseudo for SVE MLA/MAD/MLS/MSB multiclass sve_int_3op_p_mladdsub { - def _UNDEF_B : PredThreeOpPseudo; - def _UNDEF_H : PredThreeOpPseudo; - def _UNDEF_S : PredThreeOpPseudo; - def _UNDEF_D : PredThreeOpPseudo; + def _B_UNDEF : PredThreeOpPseudo; + def _H_UNDEF : PredThreeOpPseudo; + def _S_UNDEF : PredThreeOpPseudo; + def _D_UNDEF : PredThreeOpPseudo; let AddedComplexity = 9 in { - def : SVE_4_Op_Pat(NAME # _UNDEF_B)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_D)>; + def : SVE_4_Op_Pat(NAME # _B_UNDEF)>; + def : SVE_4_Op_Pat(NAME # _H_UNDEF)>; + def : SVE_4_Op_Pat(NAME # _S_UNDEF)>; + def : SVE_4_Op_Pat(NAME # _D_UNDEF)>; } } @@ -3805,9 +3805,9 @@ def : SVE_3_Op_Pat(NAME # _S)>; - def _UNDEF_S : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_S)>; + defm : SVE_3_Op_Undef_Pat(NAME # _S_UNDEF)>; } multiclass sve2_int_un_pred_arit opc, string asm, SDPatternOperator op> { @@ -3825,15 +3825,15 @@ def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; - def _UNDEF_B : PredOneOpPassthruPseudo; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; + def _B_UNDEF : PredOneOpPassthruPseudo; + def _H_UNDEF : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_B)>; - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_H)>; - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_S)>; - defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_D)>; + defm : SVE_3_Op_Undef_Pat(NAME # _B_UNDEF)>; + defm : SVE_3_Op_Undef_Pat(NAME # _H_UNDEF)>; + defm : SVE_3_Op_Undef_Pat(NAME # _S_UNDEF)>; + defm : SVE_3_Op_Undef_Pat(NAME # _D_UNDEF)>; } //===----------------------------------------------------------------------===// @@ -4481,15 +4481,15 @@ def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; - def _UNDEF_B : PredOneOpPassthruPseudo; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; + def _B_UNDEF : PredOneOpPassthruPseudo; + def _H_UNDEF : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_B)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _B_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } multiclass sve_int_un_pred_arit_0_h opc, string asm, @@ -4505,13 +4505,13 @@ def : SVE_InReg_Extend(NAME # _S)>; def : SVE_InReg_Extend(NAME # _D)>; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; + def _H_UNDEF : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_H)>; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_S)>; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _H_UNDEF)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _S_UNDEF)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _D_UNDEF)>; } multiclass sve_int_un_pred_arit_0_w opc, string asm, @@ -4524,11 +4524,11 @@ def : SVE_InReg_Extend(NAME # _S)>; def : SVE_InReg_Extend(NAME # _D)>; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_S)>; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _S_UNDEF)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _D_UNDEF)>; } multiclass sve_int_un_pred_arit_0_d opc, string asm, @@ -4538,9 +4538,9 @@ def : SVE_InReg_Extend(NAME # _D)>; - def _UNDEF_D : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _D_UNDEF)>; } multiclass sve_int_un_pred_arit_1 opc, string asm, @@ -4559,15 +4559,15 @@ def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; - def _UNDEF_B : PredOneOpPassthruPseudo; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; + def _B_UNDEF : PredOneOpPassthruPseudo; + def _H_UNDEF : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_B)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _B_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } multiclass sve_int_un_pred_arit_1_fp opc, string asm, SDPatternOperator op> { @@ -4585,16 +4585,16 @@ def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; - def _UNDEF_H : PredOneOpPassthruPseudo; - def _UNDEF_S : PredOneOpPassthruPseudo; - def _UNDEF_D : PredOneOpPassthruPseudo; + def _H_UNDEF : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; - defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } //===----------------------------------------------------------------------===// @@ -5820,15 +5820,15 @@ } multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd { - def _ZERO_B : PredTwoOpImmPseudo; - def _ZERO_H : PredTwoOpImmPseudo; - def _ZERO_S : PredTwoOpImmPseudo; - def _ZERO_D : PredTwoOpImmPseudo; + def _B_ZERO : PredTwoOpImmPseudo; + def _H_ZERO : PredTwoOpImmPseudo; + def _S_ZERO : PredTwoOpImmPseudo; + def _D_ZERO : PredTwoOpImmPseudo; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_B)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_D)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _D_ZERO)>; } multiclass sve_int_bin_pred_shift_imm_right opc, string asm, string Ps, @@ -5866,15 +5866,15 @@ } multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd { - def _ZERO_B : PredTwoOpImmPseudo; - def _ZERO_H : PredTwoOpImmPseudo; - def _ZERO_S : PredTwoOpImmPseudo; - def _ZERO_D : PredTwoOpImmPseudo; + def _B_ZERO : PredTwoOpImmPseudo; + def _H_ZERO : PredTwoOpImmPseudo; + def _S_ZERO : PredTwoOpImmPseudo; + def _D_ZERO : PredTwoOpImmPseudo; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_B)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_D)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _D_ZERO)>; } class sve_int_bin_pred_shift sz8_64, bit wide, bits<3> opc, @@ -5921,29 +5921,29 @@ } multiclass sve_int_bin_pred_zeroing_bhsd { - def _ZERO_B : PredTwoOpPseudo; - def _ZERO_H : PredTwoOpPseudo; - def _ZERO_S : PredTwoOpPseudo; - def _ZERO_D : PredTwoOpPseudo; + def _B_ZERO : PredTwoOpPseudo; + def _H_ZERO : PredTwoOpPseudo; + def _S_ZERO : PredTwoOpPseudo; + def _D_ZERO : PredTwoOpPseudo; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_B)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; + def : SVE_3_Op_Pat_SelZero(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_SelZero(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_SelZero(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_SelZero(NAME # _D_ZERO)>; } multiclass sve_int_bin_pred_imm_zeroing_bhsd { - def _ZERO_B : PredTwoOpImmPseudo, FalseLanesZero>; - def _ZERO_H : PredTwoOpImmPseudo, FalseLanesZero>; - def _ZERO_S : PredTwoOpImmPseudo, FalseLanesZero>; - def _ZERO_D : PredTwoOpImmPseudo, FalseLanesZero>; + def _B_ZERO : PredTwoOpImmPseudo, FalseLanesZero>; + def _H_ZERO : PredTwoOpImmPseudo, FalseLanesZero>; + def _S_ZERO : PredTwoOpImmPseudo, FalseLanesZero>; + def _D_ZERO : PredTwoOpImmPseudo, FalseLanesZero>; - def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_B)>; - def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_H)>; - def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_S)>; - def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_D)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _B_ZERO)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _H_ZERO)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _S_ZERO)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _D_ZERO)>; } multiclass sve_int_bin_pred_shift_wide opc, string asm, @@ -9024,52 +9024,52 @@ // Predicated pseudo floating point two operand instructions. multiclass sve_fp_bin_pred_hfd { - def _UNDEF_H : PredTwoOpPseudo; - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; + def _H_UNDEF : PredTwoOpPseudo; + def _S_UNDEF : PredTwoOpPseudo; + def _D_UNDEF : PredTwoOpPseudo; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; + def : SVE_3_Op_Pat(NAME # _H_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _H_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _H_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _S_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _S_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _D_UNDEF)>; } // Predicated pseudo floating point three operand instructions. multiclass sve_fp_3op_pred_hfd { - def _UNDEF_H : PredThreeOpPseudo; - def _UNDEF_S : PredThreeOpPseudo; - def _UNDEF_D : PredThreeOpPseudo; + def _H_UNDEF : PredThreeOpPseudo; + def _S_UNDEF : PredThreeOpPseudo; + def _D_UNDEF : PredThreeOpPseudo; - def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_D)>; + def : SVE_4_Op_Pat(NAME # _H_UNDEF)>; + def : SVE_4_Op_Pat(NAME # _H_UNDEF)>; + def : SVE_4_Op_Pat(NAME # _H_UNDEF)>; + def : SVE_4_Op_Pat(NAME # _S_UNDEF)>; + def : SVE_4_Op_Pat(NAME # _S_UNDEF)>; + def : SVE_4_Op_Pat(NAME # _D_UNDEF)>; } // Predicated pseudo integer two operand instructions. multiclass sve_int_bin_pred_bhsd { - def _UNDEF_B : PredTwoOpPseudo; - def _UNDEF_H : PredTwoOpPseudo; - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; + def _B_UNDEF : PredTwoOpPseudo; + def _H_UNDEF : PredTwoOpPseudo; + def _S_UNDEF : PredTwoOpPseudo; + def _D_UNDEF : PredTwoOpPseudo; - def : SVE_3_Op_Pat(NAME # _UNDEF_B)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; + def : SVE_3_Op_Pat(NAME # _B_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _H_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _S_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _D_UNDEF)>; } // As sve_int_bin_pred but when only i32 and i64 vector types are required. multiclass sve_int_bin_pred_sd { - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; + def _S_UNDEF : PredTwoOpPseudo; + def _D_UNDEF : PredTwoOpPseudo; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; + def : SVE_3_Op_Pat(NAME # _S_UNDEF)>; + def : SVE_3_Op_Pat(NAME # _D_UNDEF)>; } // Predicated pseudo integer two operand instructions. Second operand is an @@ -9077,27 +9077,27 @@ multiclass sve_int_shift_pred_bhsd { - def _UNDEF_B : PredTwoOpImmPseudo, FalseLanesUndef>; - def _UNDEF_H : PredTwoOpImmPseudo, FalseLanesUndef>; - def _UNDEF_S : PredTwoOpImmPseudo, FalseLanesUndef>; - def _UNDEF_D : PredTwoOpImmPseudo, FalseLanesUndef>; + def _B_UNDEF : PredTwoOpImmPseudo, FalseLanesUndef>; + def _H_UNDEF : PredTwoOpImmPseudo, FalseLanesUndef>; + def _S_UNDEF : PredTwoOpImmPseudo, FalseLanesUndef>; + def _D_UNDEF : PredTwoOpImmPseudo, FalseLanesUndef>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_B)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_H)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_S)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_D)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _B_UNDEF)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _H_UNDEF)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _S_UNDEF)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _D_UNDEF)>; } multiclass sve_int_bin_pred_all_active_bhsd { - def _UNDEF_B : PredTwoOpPseudo; - def _UNDEF_H : PredTwoOpPseudo; - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; - - def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_B)>; - def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_H)>; - def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_S)>; - def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_D)>; + def _B_UNDEF : PredTwoOpPseudo; + def _H_UNDEF : PredTwoOpPseudo; + def _S_UNDEF : PredTwoOpPseudo; + def _D_UNDEF : PredTwoOpPseudo; + + def : SVE_2_Op_Pred_All_Active_Pt(NAME # _B_UNDEF)>; + def : SVE_2_Op_Pred_All_Active_Pt(NAME # _H_UNDEF)>; + def : SVE_2_Op_Pred_All_Active_Pt(NAME # _S_UNDEF)>; + def : SVE_2_Op_Pred_All_Active_Pt(NAME # _D_UNDEF)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir @@ -34,6 +34,6 @@ ; CHECK-NEXT: $z0 = BIC_ZPmZ_H killed renamable $p0, internal killed $z0, internal killed renamable $z0 ; CHECK-NEXT: } ; CHECK-NEXT: RET undef $lr, implicit $z0 - renamable $z0 = BIC_ZPZZ_ZERO_H killed renamable $p0, killed renamable $z0, killed renamable $z0 + renamable $z0 = BIC_ZPZZ_H_ZERO killed renamable $p0, killed renamable $z0, killed renamable $z0 RET_ReallyLR implicit $z0 ... diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryComm-merging.mir b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryComm-merging.mir --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryComm-merging.mir +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryComm-merging.mir @@ -34,6 +34,6 @@ ; CHECK-NEXT: $z0 = FMUL_ZPmZ_S renamable $p0, internal killed $z0, internal killed renamable $z0 ; CHECK-NEXT: } ; CHECK-NEXT: RET undef $lr, implicit $z0 - renamable $z0 = nnan ninf nsz arcp contract afn reassoc FMUL_ZPZZ_ZERO_S renamable $p0, killed renamable $z0, renamable $z0 + renamable $z0 = nnan ninf nsz arcp contract afn reassoc FMUL_ZPZZ_S_ZERO renamable $p0, killed renamable $z0, renamable $z0 RET_ReallyLR implicit $z0 ... diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryCommWithRev-merging.mir b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryCommWithRev-merging.mir --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryCommWithRev-merging.mir +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryCommWithRev-merging.mir @@ -36,6 +36,6 @@ ; CHECK-NEXT: $z0 = FSUBR_ZPmZ_S renamable $p0, internal killed $z0, internal killed renamable $z0 ; CHECK-NEXT: } ; CHECK-NEXT: RET undef $lr, implicit $z0 - renamable $z0 = nnan ninf nsz arcp contract afn reassoc FSUB_ZPZZ_ZERO_S renamable $p0, killed renamable $z0, renamable $z0 + renamable $z0 = nnan ninf nsz arcp contract afn reassoc FSUB_ZPZZ_S_ZERO renamable $p0, killed renamable $z0, renamable $z0 RET_ReallyLR implicit $z0 ... diff --git a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir --- a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir +++ b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 # RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=aarch64-expand-pseudo -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s --- name: add_x @@ -12,11 +13,12 @@ bb.0: liveins: $p0, $z0 - ; CHECK: add_x - ; CHECK-NOT: MOVPRFX - ; CHECK: $z0 = FADD_ZPmZ_S renamable $p0, killed $z0, renamable $z0 - ; CHECK-NEXT: RET - renamable $z0 = FADD_ZPZZ_UNDEF_S renamable $p0, renamable $z0, killed renamable $z0 + ; CHECK-LABEL: name: add_x + ; CHECK: liveins: $p0, $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $z0 = FADD_ZPmZ_S renamable $p0, killed $z0, renamable $z0 + ; CHECK-NEXT: RET undef $lr + renamable $z0 = FADD_ZPZZ_S_UNDEF renamable $p0, renamable $z0, killed renamable $z0 RET_ReallyLR ... @@ -26,8 +28,12 @@ name: expand_mls_to_msb body: | bb.0: + ; CHECK-LABEL: name: expand_mls_to_msb + ; CHECK: renamable $p0 = PTRUE_B 31 + ; CHECK-NEXT: $z0 = MSB_ZPmZZ_B killed renamable $p0, killed $z0, killed renamable $z1, killed renamable $z2 + ; CHECK-NEXT: RET undef $lr, implicit $z0 renamable $p0 = PTRUE_B 31 - renamable $z0 = MLS_ZPZZZ_UNDEF_B killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 + renamable $z0 = MLS_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... @@ -36,7 +42,11 @@ name: expand_mla_to_mad body: | bb.0: + ; CHECK-LABEL: name: expand_mla_to_mad + ; CHECK: renamable $p0 = PTRUE_B 31 + ; CHECK-NEXT: $z0 = MAD_ZPmZZ_B killed renamable $p0, killed $z0, killed renamable $z1, killed renamable $z2 + ; CHECK-NEXT: RET undef $lr, implicit $z0 renamable $p0 = PTRUE_B 31 - renamable $z0 = MLA_ZPZZZ_UNDEF_B killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 + renamable $z0 = MLA_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ...