diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td --- a/llvm/lib/Target/AArch64/AArch64SchedA510.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td @@ -554,196 +554,200 @@ // Loop control, based on GPR def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], - (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; + (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>; -def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>; +def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>; // Loop terminate -def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; +def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)")>; // Predicate counting scalar def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], - (instregex "^CNT[BHWD]_XPiI$")>; + (instregex "^CNT[BHWD]_XPiI")>; def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], - (instregex "^(INC|DEC)[BHWD]_XPiI$")>; + (instregex "^(INC|DEC)[BHWD]_XPiI")>; def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], - (instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I$")>; + (instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I")>; // Predicate counting scalar, active predicate def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], - (instregex "^CNTP_XPP_[BHSD]$")>; + (instregex "^CNTP_XPP_[BHSD]")>; def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], - (instregex "^(DEC|INC)P_XP_[BHSD]$")>; + (instregex "^(DEC|INC)P_XP_[BHSD]")>; def : InstRW<[CortexA510Write<8, CortexA510UnitVALU0>], - (instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$", - "^(UQDEC|UQINC)P_WP_[BHSD]$", - "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>; + (instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]", + "^(UQDEC|UQINC)P_WP_[BHSD]", + "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]")>; // Predicate counting vector, active predicate def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], - (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>; + (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>; // Predicate logical def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], - (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; + (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>; // Predicate logical, flag setting def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], - (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>; + (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>; // Predicate reverse -def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]$")>; +def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]")>; // Predicate select def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs SEL_PPPP)>; // Predicate set -def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>; +def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFALSE", "^PTRUE_[BHSD]")>; // Predicate set/initialize, set flags -def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]$")>; +def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]")>; // Predicate find first/next -def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>; +def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>; // Predicate test def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PTEST_PP)>; // Predicate transpose -def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]$")>; +def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]")>; // Predicate unpack and widen def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PUNPKHI_PP, PUNPKLO_PP)>; // Predicate zip/unzip -def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; +def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]")>; // SVE integer instructions // ----------------------------------------------------------------------------- // Arithmetic, absolute diff -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABD_(ZPmZ|ZPZZ)_[BHSD]")>; // Arithmetic, absolute diff accum -def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>; +def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]")>; // Arithmetic, absolute diff accum long -def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>; +def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>; // Arithmetic, absolute diff long -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>; // Arithmetic, basic def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], - (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$", - "^(ADD|SUB|SUBR)_ZPmZ_[BHSD]$", - "^(ADD|SUB)_ZZZ_[BHSD]$", - "^(ADD|SUB|SUBR)_ZI_[BHSD]$", - "^ADR_[SU]XTW_ZZZ_D_[0123]$", - "^ADR_LSL_ZZZ_[SD]_[0123]$", - "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$", - "^SADDLBT_ZZZ_[HSD]$", - "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$", - "^SSUBL(BT|TB)_ZZZ_[HSD]$")>; + (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZPmZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]", + "^(ADD|SUB)_ZZZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZI_[BHSD]", + "^ADR_[SU]XTW_ZZZ_D_[0123]", + "^ADR_LSL_ZZZ_[SD]_[0123]", + "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", + "^SADDLBT_ZZZ_[HSD]", + "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", + "^SSUBL(BT|TB)_ZZZ_[HSD]")>; // Arithmetic, complex def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], - (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$", - "^SQ(ABS|NEG)_ZPmZ_[BHSD]$", - "^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]$", - "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$", - "^[SU]Q(ADD|SUB)_ZI_[BHSD]$", - "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$", - "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>; + (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]", + "^SQ(ABS|NEG)_ZPmZ_[BHSD]", + "^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]", + "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]", + "^[SU]Q(ADD|SUB)_ZI_[BHSD]", + "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]", + "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>; // Arithmetic, large integer -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>; // Arithmetic, pairwise add -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^ADDP_ZPmZ_[BHSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^ADDP_ZPmZ_[BHSD]")>; // Arithmetic, pairwise add and accum long -def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>; +def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^[SU]ADALP_ZPmZ_[HSD]")>; // Arithmetic, shift def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], - (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$", - "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$", - "^(ASR|LSL|LSR)_ZPmI_[BHSD]$", - "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$", - "^(ASR|LSL|LSR)_ZZI_[BHSD]$", - "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>; + (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]", + "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]", + "^(ASR|LSL|LSR)_ZPmI_[BHSD]", + "^(ASR|LSL|LSR)_ZPZI_[BHSD]", + "^(ASR|LSL|LSR)_ZPmZ_[BHSD]", + "^(ASR|LSL|LSR)_ZPZZ_[BHSD]", + "^(ASR|LSL|LSR)_ZZI_[BHSD]", + "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; +// Arithmetic, shift right for divide +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], + (instregex "^ASRD_ZPmI_[BHSD]", + "^ASRD_ZPZI_[BHSD]")>; // Arithmetic, shift and accumulate def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], - (instregex "^(SSRA|USRA)_ZZI_[BHSD]$")>; + (instregex "^(SSRA|USRA)_ZZI_[BHSD]")>; def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], - (instregex "^(SRSRA|URSRA)_ZZI_[BHSD]$")>; + (instregex "^(SRSRA|URSRA)_ZZI_[BHSD]")>; // Arithmetic, shift by immediate // Arithmetic, shift by immediate and insert def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], - (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>; + (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]")>; // Arithmetic, shift complex def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], - (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$", - "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$", - "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$", - "^SQSHRU?N[BT]_ZZI_[BHS]$", - "^UQR?SHRN[BT]_ZZI_[BHS]$")>; - -// Arithmetic, shift right for divide -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^ASRD_ZPmI_[BHSD]$")>; + (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]", + "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_(ZPmZ|ZPZZ)_[BHSD]", + "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]", + "^SQSHRU?N[BT]_ZZI_[BHS]", + "^UQR?SHRN[BT]_ZZI_[BHS]")>; // Arithmetic, shift rounding def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], - (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$", - "^[SU]RSHR_ZPmI_[BHSD]$")>; + (instregex "^(SRSHL|SRSHR|SRSHLR|URSHL|URSHLR|URSHR)_(ZPmZ|ZPZZ|ZPZI)_[BHSD]", + "^[SU]RSHR_ZPmI_[BHSD]")>; // Bit manipulation def : InstRW<[CortexA510MCWrite<14, 13, CortexA510UnitVMC>], - (instregex "^(BDEP|BEXT|BGRP)_ZZZ_B$")>; + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_B")>; def : InstRW<[CortexA510MCWrite<22, 21, CortexA510UnitVMC>], - (instregex "^(BDEP|BEXT|BGRP)_ZZZ_H$")>; + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_H")>; def : InstRW<[CortexA510MCWrite<38, 37, CortexA510UnitVMC>], - (instregex "^(BDEP|BEXT|BGRP)_ZZZ_S$")>; + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_S")>; def : InstRW<[CortexA510MCWrite<70, 69, CortexA510UnitVMC>], - (instregex "^(BDEP|BEXT|BGRP)_ZZZ_D$")>; + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_D")>; // Bitwise select -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>; // Count/reverse bits -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]$")>; -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]$")>; -def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_S$")>; -def : InstRW<[CortexA510Write<12, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_D$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]")>; +def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_S")>; +def : InstRW<[CortexA510Write<12, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_D")>; // Broadcast logical bitmask immediate to vector def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs DUPM_ZI)>; // Compare and set flags def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], - (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", - "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; + (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]", + "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>; // Complex add -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CADD_ZZI_[BHSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CADD_ZZI_[BHSD]")>; -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^SQCADD_ZZI_[BHSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^SQCADD_ZZI_[BHSD]")>; // Complex dot product 8-bit element def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; @@ -752,19 +756,19 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; // Complex multiply-add B, H, S element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^CMLA_ZZZ_[BHS]$", - "^CMLA_ZZZI_[HS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^CMLA_ZZZ_[BHS]", + "^CMLA_ZZZI_[HS]")>; // Complex multiply-add D element size def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CMLA_ZZZ_D)>; // Conditional extract operations, scalar form -def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; +def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^CLAST[AB]_RPZ_[BHSD]")>; // Conditional extract operations, SIMD&FP scalar and vector forms -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", - "^COMPACT_ZPZ_[SD]$", - "^SPLICE_ZPZZ?_[BHSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]", + "^COMPACT_ZPZ_[SD]", + "^SPLICE_ZPZZ?_[BHSD]")>; // Convert to floating point, 64b to float or convert to double def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]")>; @@ -782,165 +786,166 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; // Copy, scalar -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>],(instregex "^CPY_ZPmR_[BHSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>],(instregex "^CPY_ZPmR_[BHSD]")>; // Copy, scalar SIMD&FP or imm -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CPY_ZPm[IV]_[BHSD]$", - "^CPY_ZPzI_[BHSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CPY_ZPm[IV]_[BHSD]", + "^CPY_ZPzI_[BHSD]")>; // Divides, 32 bit -def : InstRW<[CortexA510MCWrite<15, 12, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_ZPmZ_S$")>; +def : InstRW<[CortexA510MCWrite<15, 12, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_S")>; // Divides, 64 bit -def : InstRW<[CortexA510MCWrite<26, 23, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_ZPmZ_D$")>; +def : InstRW<[CortexA510MCWrite<26, 23, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_D")>; // Dot product, 8 bit -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S")>; // Dot product, 8 bit, using signed and unsigned integers def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; // Dot product, 16 bit -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D")>; // Duplicate, immediate and indexed form -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZI_[BHSD]$", - "^DUP_ZZI_[BHSDQ]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZI_[BHSD]", + "^DUP_ZZI_[BHSDQ]")>; // Duplicate, scalar form -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZR_[BHSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZR_[BHSD]")>; // Extend, sign or zero -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]XTB_ZPmZ_[HSD]$", - "^[SU]XTH_ZPmZ_[SD]$", - "^[SU]XTW_ZPmZ_[D]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]XTB_ZPmZ_[HSD]", + "^[SU]XTH_ZPmZ_[SD]", + "^[SU]XTW_ZPmZ_[D]")>; // Extract def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instrs EXT_ZZI, EXT_ZZI_B)>; // Extract narrow saturating -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$", - "^SQXTUN[BT]_ZZ_[BHS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]", + "^SQXTUN[BT]_ZZ_[BHS]")>; // Extract/insert operation, SIMD and FP scalar form -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^LAST[AB]_VPZ_[BHSD]$", - "^INSR_ZV_[BHSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^LAST[AB]_VPZ_[BHSD]", + "^INSR_ZV_[BHSD]")>; // Extract/insert operation, scalar -def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^LAST[AB]_RPZ_[BHSD]$", - "^INSR_ZR_[BHSD]$")>; +def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^LAST[AB]_RPZ_[BHSD]", + "^INSR_ZR_[BHSD]")>; // Histogram operations -def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]$", - "^HISTSEG_ZZZ$")>; +def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]", + "^HISTSEG_ZZZ")>; // Horizontal operations, B, H, S form, immediate operands only -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_II_[BHS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_II_[BHS]")>; // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar // operands only / immediate, scalar operands -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>; // Horizontal operations, D form, immediate operands only def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs INDEX_II_D)>; // Horizontal operations, D form, scalar, immediate operands)/ scalar operands // only / immediate, scalar operands -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_D$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_D")>; // Logical def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], - (instregex "^(AND|EOR|ORR)_ZI$", - "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", - "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; + (instregex "^(AND|EOR|ORR)_ZI", + "^(AND|BIC|EOR|EOR|ORR)_ZZZ", + "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]", + "^(AND|BIC|EOR|NOT|ORR)_ZPZZ_[BHSD]")>; def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], - (instregex "^EOR(BT|TB)_ZZZ_[BHSD]$")>; + (instregex "^EOR(BT|TB)_ZZZ_[BHSD]")>; // Max/min, basic and pairwise -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$", - "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]", + "^[SU](MAX|MIN)P?_(ZPmZ|ZPZZ)_[BHSD]")>; // Matching operations -def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^N?MATCH_PPzZZ_[BH]$")>; +def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^N?MATCH_PPzZZ_[BH]")>; // Matrix multiply-accumulate def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; // Move prefix -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", - "^MOVPRFX_ZZ$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]", + "^MOVPRFX_ZZ")>; // Multiply, B, H, S element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$", - "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_[BHS]", + "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_[BHS]")>; // Multiply, D element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$", - "^[SU]MULH_(ZPmZ|ZZZ)_D$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_D", + "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_D")>; // Multiply long -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$", - "^[SU]MULL[BT]_ZZZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]", + "^[SU]MULL[BT]_ZZZ_[HSD]")>; // Multiply accumulate, B, H, S element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_ZZZI_[BHS]$", - "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_[BHS]", + "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; // Multiply accumulate, D element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_ZZZI_D$", - "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_D", + "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>; // Multiply accumulate long -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$", - "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]", + "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>; // Multiply accumulate saturating doubling long regular -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$", - "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]", + "^SQDML[AS](LB|LT)_ZZZI_[SD]")>; // Multiply saturating doubling high, B, H, S element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULH_ZZZ_[BHS]$", - "^SQDMULH_ZZZI_[HS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULH_ZZZ_[BHS]", + "^SQDMULH_ZZZI_[HS]")>; // Multiply saturating doubling high, D element size def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; // Multiply saturating doubling long -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$", - "^SQDMULL[BT]_ZZZI_[SD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULL[BT]_ZZZ_[HSD]", + "^SQDMULL[BT]_ZZZI_[SD]")>; // Multiply saturating rounding doubling regular/complex accumulate, B, H, S // element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$", - "^SQRDCMLAH_ZZZ_[BHS]$", - "^SQRDML[AS]H_ZZZI_[HS]$", - "^SQRDCMLAH_ZZZI_[HS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZ_[BHS]", + "^SQRDCMLAH_ZZZ_[BHS]", + "^SQRDML[AS]H_ZZZI_[HS]", + "^SQRDCMLAH_ZZZI_[HS]")>; // Multiply saturating rounding doubling regular/complex accumulate, D element // size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZI?_D$", - "^SQRDCMLAH_ZZZ_D$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZI?_D", + "^SQRDCMLAH_ZZZ_D")>; // Multiply saturating rounding doubling regular/complex, B, H, S element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZ_[BHS]$", - "^SQRDMULH_ZZZI_[HS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZ_[BHS]", + "^SQRDMULH_ZZZI_[HS]")>; // Multiply saturating rounding doubling regular/complex, D element size -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZI?_D$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZI?_D")>; // Multiply/multiply long, (8x8) polynomial -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^PMUL_ZZZ_B$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^PMUL_ZZZ_B")>; -def : InstRW<[CortexA510Write<6, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]$")>; +def : InstRW<[CortexA510Write<6, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]")>; // Predicate counting vector def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], - (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>; + (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI")>; // Reciprocal estimate -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; // Reduction, arithmetic, B form def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; @@ -955,43 +960,44 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; // Reduction, logical -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]")>; // Reverse, vector -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]$", - "^REVB_ZPmZ_[HSD]$", - "^REVH_ZPmZ_[SD]$", - "^REVW_ZPmZ_D$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]", + "^REVB_ZPmZ_[HSD]", + "^REVH_ZPmZ_[SD]", + "^REVW_ZPmZ_D")>; // Select, vector form -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SEL_ZPZZ_[BHSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SEL_ZPZZ_[BHSD]")>; // Table lookup -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBL_ZZZZ?_[BHSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBL_ZZZZ?_[BHSD]")>; // Table lookup extension -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]")>; // Transpose, vector form -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>; // Unpack and extend -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>; // Zip/unzip -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>; // SVE floating-point instructions // ----------------------------------------------------------------------------- // Floating point absolute value/difference -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]", + "^FAB[SD]_ZPZZ_[HSD]")>; // Floating point arithmetic -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", - "^FADDP_ZPmZZ_[HSD]$", - "^FNEG_ZPmZ_[HSD]$", - "^FSUBR_ZPm[IZ]_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ|ZPZI|ZPZZ)_[HSD]", + "^FADDP_ZPmZZ_[HSD]", + "^FNEG_ZPmZ_[HSD]", + "^FSUBR_(ZPm[IZ]|ZPZ[IZ])_[HSD]")>; // Floating point associative add, F16 def : InstRW<[CortexA510MCWrite<32, 29, CortexA510UnitVALU>], (instrs FADDA_VPZ_H)>; @@ -1003,17 +1009,17 @@ def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU>], (instrs FADDA_VPZ_D)>; // Floating point compare -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]$", - "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$", - "^FCM(LE|LT)_PPzZ0_[HSD]$", - "^FCMUO_PPzZZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]", + "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]", + "^FCM(LE|LT)_PPzZ0_[HSD]", + "^FCMUO_PPzZZ_[HSD]")>; // Floating point complex add -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCADD_ZPmZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCADD_ZPmZ_[HSD]")>; // Floating point complex multiply add -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FCMLA_ZPmZZ_[HSD]$", - "^FCMLA_ZZZI_[HS]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FCMLA_ZPmZZ_[HSD]", + "^FCMLA_ZZZI_[HS]")>; // Floating point convert, long or narrow (F16 to F32 or F32 to F16) def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", @@ -1030,13 +1036,13 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTX_ZPmZ_DtoS", "FCVTXNT_ZPmZ_DtoS")>; // Floating point base2 log, F16 -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_H)>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>; // Floating point base2 log, F32 -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_S)>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>; // Floating point base2 log, F64 -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_D)>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>; // Floating point convert to integer, F16 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; @@ -1049,93 +1055,92 @@ (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; // Floating point copy -def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>], (instregex "^FCPY_ZPmI_[HSD]$", - "^FDUP_ZI_[HSD]$")>; +def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>], (instregex "^FCPY_ZPmI_[HSD]", + "^FDUP_ZI_[HSD]")>; // Floating point divide, F16 -def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_H$")>; +def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; // Floating point divide, F32 -def : InstRW<[CortexA510MCWrite<13, 10, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_S$")>; +def : InstRW<[CortexA510MCWrite<13, 10, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; // Floating point divide, F64 -def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_D$")>; +def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; // Floating point min/max pairwise -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>; // Floating point min/max -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?_(ZPm[IZ]|ZPZZ|ZPZI)_[HSD]")>; // Floating point multiply -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$", - "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^(FSCALE|FMULX)_(ZPmZ|ZPZZ)_[HSD]", + "^FMUL_(ZPm[IZ]|ZZZI?|ZPZI|ZPZZ)_[HSD]")>; // Floating point multiply accumulate def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], - (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$", - "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>; + (instregex "^FML[AS]_(ZPmZZ|ZZZI|ZPZZZ)_[HSD]", + "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_(ZPmZZ|ZPZZZ)_[HSD]")>; // Floating point multiply add/sub accumulate long -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>; // Floating point reciprocal estimate, F16 -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H, - FRSQRTE_ZZ_H)>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FRECPE_ZZ_H", "^FRECPX_ZPmZ_H", + "^FRSQRTE_ZZ_H")>; // Floating point reciprocal estimate, F32 -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S, - FRSQRTE_ZZ_S)>; - +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FRECPE_ZZ_S", "^FRECPX_ZPmZ_S", + "^FRSQRTE_ZZ_S")>; // Floating point reciprocal estimate, F64 -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D, - FRSQRTE_ZZ_D)>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>],(instregex "^FRECPE_ZZ_D", "^FRECPX_ZPmZ_D", + "^FRSQRTE_ZZ_D")>; // Floating point reciprocal step -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>; // Floating point reduction, F16 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], - (instregex "^(FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_[HSD]$")>; + (instregex "^(FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_[HSD]")>; // Floating point reduction, F32 def : InstRW<[CortexA510MCWrite<12, 11, CortexA510UnitVALU0>], - (instregex "^FADDV_VPZ_H$")>; + (instregex "^FADDV_VPZ_H")>; def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU0>], - (instregex "^FADDV_VPZ_S$")>; + (instregex "^FADDV_VPZ_S")>; def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], - (instregex "^FADDV_VPZ_D$")>; + (instregex "^FADDV_VPZ_D")>; // Floating point round to integral, F16 -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; // Floating point round to integral, F32 -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; // Floating point round to integral, F64 -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; // Floating point square root, F16 -def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_H)>; +def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_H")>; // Floating point square root, F32 -def : InstRW<[CortexA510MCWrite<12, 9, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_S)>; +def : InstRW<[CortexA510MCWrite<12, 9, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_S")>; // Floating point square root, F64 -def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_D)>; +def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_D")>; // Floating point trigonometric exponentiation -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FEXPA_ZZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FEXPA_ZZ_[HSD]")>; // Floating point trigonometric multiply add -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTMAD_ZZI_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTMAD_ZZI_[HSD]")>; // Floating point trigonometric, miscellaneous -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]$")>; -def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]")>; // SVE BFloat16 (BF16) instructions @@ -1151,7 +1156,7 @@ def : InstRW<[A510Write_15cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ)>; // Multiply accumulate long -def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; +def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?")>; // SVE Load instructions // ----------------------------------------------------------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -1998,174 +1998,172 @@ // Loop control, based on GPR def : InstRW<[V2Write_3cyc_2M], - (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; -def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>; + (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>; +def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>; // Loop terminate -def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; +def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>; // Predicate counting scalar def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; def : InstRW<[V2Write_2cyc_1M], - (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$", - "^SQ(DEC|INC)[BHWD]_XPiWdI$", - "^UQ(DEC|INC)[BHWD]_WPiI$")>; + (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI", + "^SQ(DEC|INC)[BHWD]_XPiWdI", + "^UQ(DEC|INC)[BHWD]_WPiI")>; // Predicate counting scalar, ALL, {1,2,4} -def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI$")>; +def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>; // Predicate counting scalar, active predicate def : InstRW<[V2Write_2cyc_1M], - (instregex "^CNTP_XPP_[BHSD]$", - "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$", - "^(UQDEC|UQINC)P_WP_[BHSD]$", - "^(SQDEC|SQINC)P_XPWd_[BHSD]$")>; + (instregex "^CNTP_XPP_[BHSD]", + "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]", + "^(UQDEC|UQINC)P_WP_[BHSD]", + "^(SQDEC|SQINC)P_XPWd_[BHSD]")>; // Predicate counting vector, active predicate def : InstRW<[V2Write_7cyc_1M_1M0_1V], - (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>; + (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>; // Predicate logical def : InstRW<[V2Write_1or2cyc_1M0], - (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; + (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>; // Predicate logical, flag setting def : InstRW<[V2Write_1or2cyc_1M0_1M], - (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>; + (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>; // Predicate reverse -def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>; // Predicate select def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>; // Predicate set -def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>; // Predicate set/initialize, set flags -def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]$")>; +def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>; // Predicate find first/next -def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>; // Predicate test def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>; // Predicate transpose -def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>; // Predicate unpack and widen def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; // Predicate zip/unzip -def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>; // SVE integer instructions // ----------------------------------------------------------------------------- // Arithmetic, absolute diff -def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$", - "^[SU]ABD_ZPZZ_[BHSD]_UNDEF$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]", + "^[SU]ABD_ZPZZ_[BHSD]")>; // Arithmetic, absolute diff accum -def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>; +def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>; // Arithmetic, absolute diff accum long -def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>; +def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>; // Arithmetic, absolute diff long -def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>; // Arithmetic, basic def : InstRW<[V2Write_2cyc_1V], - (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", - "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]_UNDEF$", - "^(ADD|SUB)_ZZZ_[BHSD]$", - "^(ADD|SUB|SUBR)_ZI_[BHSD]$", - "^ADR_[SU]XTW_ZZZ_D_[0123]$", - "^ADR_LSL_ZZZ_[SD]_[0123]$", - "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$", - "^SADDLBT_ZZZ_[HSD]$", - "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$", - "^SSUBL(BT|TB)_ZZZ_[HSD]$")>; + (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]", + "^(ADD|SUB)_ZZZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZI_[BHSD]", + "^ADR_[SU]XTW_ZZZ_D_[0123]", + "^ADR_LSL_ZZZ_[SD]_[0123]", + "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", + "^SADDLBT_ZZZ_[HSD]", + "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", + "^SSUBL(BT|TB)_ZZZ_[HSD]")>; // Arithmetic, complex def : InstRW<[V2Write_2cyc_1V], - (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$", - "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", - "^SQ(ABS|NEG)_ZPmZ_[BHSD]_UNDEF$", - "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$", - "^[SU]Q(ADD|SUB)_ZI_[BHSD]$", - "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$", - "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>; + (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]", + "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]", + "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]", + "^[SU]Q(ADD|SUB)_ZI_[BHSD]", + "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]", + "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>; // Arithmetic, large integer -def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>; // Arithmetic, pairwise add -def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>; // Arithmetic, pairwise add and accum long def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA], - (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>; + (instregex "^[SU]ADALP_ZPmZ_[HSD]")>; // Arithmetic, shift def : InstRW<[V2Write_2cyc_1V13], - (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$", - "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$", - "^(ASR|LSL|LSR)_ZPmI_[BHSD]$", - "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$", - "^(ASR|LSL|LSR)_ZZI_[BHSD]$", - "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]_UNDEF$", - "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>; + (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]", + "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]", + "^(ASR|LSL|LSR)_ZPmI_[BHSD]", + "^(ASR|LSL|LSR)_ZPmZ_[BHSD]", + "^(ASR|LSL|LSR)_ZZI_[BHSD]", + "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", + "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; // Arithmetic, shift and accumulate -def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]$")>; +def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>; // Arithmetic, shift by immediate -def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]$", - "^[SU]SHLL[BT]_ZZI_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]", + "^[SU]SHLL[BT]_ZZI_[HSD]")>; // Arithmetic, shift by immediate and insert -def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>; // Arithmetic, shift complex def : InstRW<[V2Write_4cyc_1V13], - (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$", - "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$", - "^[SU]QR?SHL_ZPZZ_[BHSD]_UNDEF$", - "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$", - "^SQSHRU?N[BT]_ZZI_[BHS]$", - "^UQR?SHRN[BT]_ZZI_[BHS]$")>; + (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]", + "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]", + "^[SU]QR?SHL_ZPZZ_[BHSD]", + "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]", + "^SQSHRU?N[BT]_ZZI_[BHS]", + "^UQR?SHRN[BT]_ZZI_[BHS]")>; // Arithmetic, shift right for divide -def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_ZPmI_[BHSD]$")>; +def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; // Arithmetic, shift rounding -def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]$", - "^[SU]RSHL_ZPZZ_[BHSD]_UNDEF$", - "^[SU]RSHR_ZPmI_[BHSD]$")>; +def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]", + "^[SU]RSHL_ZPZZ_[BHSD]", + "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>; // Bit manipulation -def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>; +def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>; // Bitwise select -def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>; // Count/reverse bits -def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$", - "^(CLS|CLZ|CNT)_ZPmZ_[BHSD]_UNDEF$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; // Broadcast logical bitmask immediate to vector def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>; // Compare and set flags def : InstRW<[V2Write_4or5cyc_1V0_1M0], - (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", - "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; + (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]", + "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>; // Complex add -def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>; // Complex dot product 8-bit element def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; @@ -2174,201 +2172,200 @@ def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; // Complex multiply-add B, H, S element size -def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]$", - "^CMLA_ZZZI_[HS]$")>; +def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]", + "^CMLA_ZZZI_[HS]")>; // Complex multiply-add D element size def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>; // Conditional extract operations, scalar form -def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; +def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>; // Conditional extract operations, SIMD&FP scalar and vector forms -def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", - "^COMPACT_ZPZ_[SD]$", - "^SPLICE_ZPZZ?_[BHSD]$")>; +def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]", + "^COMPACT_ZPZ_[SD]", + "^SPLICE_ZPZZ?_[BHSD]")>; // Convert to floating point, 64b to float or convert to double -def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD](_UNDEF)?$", - "^[SU]CVTF_ZPmZ_StoD(_UNDEF)?$")>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", + "^[SU]CVTF_ZPmZ_StoD")>; // Convert to floating point, 32b to single or half -def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS](_UNDEF)?$")>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; // Convert to floating point, 16b to half -def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH(_UNDEF)?$")>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; // Copy, scalar -def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>; +def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>; // Copy, scalar SIMD&FP or imm -def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$", - "^CPY_ZPzI_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]", + "^CPY_ZPzI_[BHSD]")>; // Divides, 32 bit -def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$", - "^[SU]DIV_ZPZZ_S_UNDEF$")>; +def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", + "^[SU]DIV_ZPZZ_S")>; // Divides, 64 bit -def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$", - "^[SU]DIV_ZPZZ_D_UNDEF$")>; +def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", + "^[SU]DIV_ZPZZ_D")>; // Dot product, 8 bit -def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>; +def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>; // Dot product, 8 bit, using signed and unsigned integers def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; // Dot product, 16 bit -def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>; +def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>; // Duplicate, immediate and indexed form -def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$", - "^DUP_ZZI_[BHSDQ]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]", + "^DUP_ZZI_[BHSDQ]")>; // Duplicate, scalar form -def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>; +def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>; // Extend, sign or zero -def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ(_UNDEF)?_[HSD]$", - "^[SU]XTH_ZPmZ(_UNDEF)?_[SD]$", - "^[SU]XTW_ZPmZ(_UNDEF)?_[D]$")>; +def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]", + "^[SU]XTH_ZPmZ_[SD]", + "^[SU]XTW_ZPmZ_[D]")>; // Extract def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; // Extract narrow saturating -def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$", - "^SQXTUN[BT]_ZZ_[BHS]$")>; +def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]", + "^SQXTUN[BT]_ZZ_[BHS]")>; // Extract/insert operation, SIMD and FP scalar form -def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", - "^INSR_ZV_[BHSD]$")>; +def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]", + "^INSR_ZV_[BHSD]")>; // Extract/insert operation, scalar -def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$", - "^INSR_ZR_[BHSD]$")>; +def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]", + "^INSR_ZR_[BHSD]")>; // Histogram operations -def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$", - "^HISTSEG_ZZZ$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]", + "^HISTSEG_ZZZ")>; // Horizontal operations, B, H, S form, immediate operands only -def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>; // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar // operands only / immediate, scalar operands -def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; +def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>; // Horizontal operations, D form, immediate operands only def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>; // Horizontal operations, D form, scalar, immediate operands)/ scalar operands // only / immediate, scalar operands -def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D$")>; +def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>; // Logical def : InstRW<[V2Write_2cyc_1V], - (instregex "^(AND|EOR|ORR)_ZI$", - "^(AND|BIC|EOR|ORR)_ZZZ$", - "^EOR(BT|TB)_ZZZ_[BHSD]$", - "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$", - "^NOT_ZPmZ_[BHSD]_UNDEF$")>; + (instregex "^(AND|EOR|ORR)_ZI", + "^(AND|BIC|EOR|ORR)_ZZZ", + "^EOR(BT|TB)_ZZZ_[BHSD]", + "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]", + "^NOT_ZPmZ_[BHSD]")>; // Max/min, basic and pairwise -def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$", - "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$", - "^[SU](MAX|MIN)_ZPZZ_[BHSD]_UNDEF$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]", + "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]", + "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>; // Matching operations // FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the // latency for this instruction is 4 cycles. -def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>; +def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>; // Matrix multiply-accumulate def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; // Move prefix -def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", - "^MOVPRFX_ZZ$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]", + "^MOVPRFX_ZZ")>; // Multiply, B, H, S element size -def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$", - "^MUL_ZPZZ_[BHS]_UNDEF$", - "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$", - "^[SU]MULH_ZPZZ_[BHS]_UNDEF$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", + "^MUL_ZPZZ_[BHS]", + "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", + "^[SU]MULH_ZPZZ_[BHS]")>; // Multiply, D element size -def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$", - "^MUL_ZPZZ_D_UNDEF$", - "^[SU]MULH_(ZPmZ|ZZZ)_D$", - "^[SU]MULH_ZPZZ_D_UNDEF$")>; +def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", + "^MUL_ZPZZ_D", + "^[SU]MULH_(ZPmZ|ZZZ)_D", + "^[SU]MULH_ZPZZ_D")>; // Multiply long -def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$", - "^[SU]MULL[BT]_ZZZ_[HSD]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]", + "^[SU]MULL[BT]_ZZZ_[HSD]")>; // Multiply accumulate, B, H, S element size def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS], - (instregex "^ML[AS]_ZZZI_[HS]$", "^ML[AS]_ZPZZZ_[BHS]_UNDEF$")>; + (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>; def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS], - (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>; + (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; // Multiply accumulate, D element size def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD], - (instregex "^ML[AS]_ZZZI_D$", "^ML[AS]_ZPZZZ_D_UNDEF$")>; + (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>; def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD], - (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>; + (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>; // Multiply accumulate long -def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$", - "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>; +def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]", + "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>; // Multiply accumulate saturating doubling long regular def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ], - (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]$", - "^SQDML[AS]L[BT]_ZZZI_[SD]$")>; + (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]", + "^SQDML[AS]L[BT]_ZZZI_[SD]")>; // Multiply saturating doubling high, B, H, S element size -def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]$", - "^SQDMULH_ZZZI_[HS]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]", + "^SQDMULH_ZZZI_[HS]")>; // Multiply saturating doubling high, D element size def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; // Multiply saturating doubling long -def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$", - "^SQDMULL[BT]_ZZZI_[SD]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]", + "^SQDMULL[BT]_ZZZI_[SD]")>; // Multiply saturating rounding doubling regular/complex accumulate, B, H, S // element size -def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$", - "^SQRDCMLAH_ZZZ_[BHS]$", - "^SQRDML[AS]H_ZZZI_[HS]$", - "^SQRDCMLAH_ZZZI_[HS]$")>; +def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]", + "^SQRDCMLAH_ZZZ_[BHS]", + "^SQRDML[AS]H_ZZZI_[HS]", + "^SQRDCMLAH_ZZZI_[HS]")>; // Multiply saturating rounding doubling regular/complex accumulate, D element // size -def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D$", - "^SQRDCMLAH_ZZZ_D$")>; +def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D", + "^SQRDCMLAH_ZZZ_D")>; // Multiply saturating rounding doubling regular/complex, B, H, S element size -def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]$", - "^SQRDMULH_ZZZI_[HS]$")>; +def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]", + "^SQRDMULH_ZZZI_[HS]")>; // Multiply saturating rounding doubling regular/complex, D element size -def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D$")>; +def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>; // Multiply/multiply long, (8x8) polynomial -def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B$", - "^PMULL[BT]_ZZZ_[HDQ]$")>; +def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B", + "^PMULL[BT]_ZZZ_[HDQ]")>; // Predicate counting vector -def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>; // Reciprocal estimate -def : InstRW<[V2Write_4cyc_2V02], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S, - URECPE_ZPmZ_S_UNDEF, URSQRTE_ZPmZ_S_UNDEF)>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; // Reduction, arithmetic, B form def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; @@ -2383,47 +2380,47 @@ def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; // Reduction, logical -def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>; +def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>; // Reverse, vector -def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$", - "^REVB_ZPmZ_[HSD]$", - "^REVH_ZPmZ_[SD]$", - "^REVW_ZPmZ_D$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]", + "^REVB_ZPmZ_[HSD]", + "^REVH_ZPmZ_[SD]", + "^REVW_ZPmZ_D")>; // Select, vector form -def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>; // Table lookup -def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>; // Table lookup extension -def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>; // Transpose, vector form -def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>; // Unpack and extend -def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>; // Zip/unzip -def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>; // SVE floating-point instructions // ----------------------------------------------------------------------------- // Floating point absolute value/difference -def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$", - "^FABD_ZPZZ_[HSD]_UNDEF$", - "^FABS_ZPmZ_[HSD]_UNDEF$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]", + "^FABD_ZPZZ_[HSD]", + "^FABS_ZPmZ_[HSD]")>; // Floating point arithmetic -def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", - "^F(ADD|SUB)_ZPZ[IZ]_[HSD]_UNDEF$", - "^FADDP_ZPmZZ_[HSD]$", - "^FNEG_ZPmZ(_UNDEF)?_[HSD]$", - "^FSUBR_ZPm[IZ]_[HSD]$", - "^FSUBR_ZPZI_[HSD]_UNDEF$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", + "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", + "^FADDP_ZPmZZ_[HSD]", + "^FNEG_ZPmZ_[HSD]", + "^FSUBR_ZPm[IZ]_[HSD]", + "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; // Floating point associative add, F16 def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>; @@ -2435,144 +2432,138 @@ def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; // Floating point compare -def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$", - "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$", - "^FCM(LE|LT)_PPzZ0_[HSD]$", - "^FCMUO_PPzZZ_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]", + "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]", + "^FCM(LE|LT)_PPzZ0_[HSD]", + "^FCMUO_PPzZZ_[HSD]")>; // Floating point complex add -def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>; +def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>; // Floating point complex multiply add -def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]$")>; -def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]$")>; +def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>; +def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>; // Floating point convert, long or narrow (F16 to F32 or F32 to F16) -def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)(_UNDEF)?$", - "^FCVTLT_ZPmZ_HtoS$", - "^FCVTNT_ZPmZ_StoH$")>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", + "^FCVTLT_ZPmZ_HtoS", + "^FCVTNT_ZPmZ_StoH")>; // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 // or F64 to F16) -def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)(_UNDEF)?$", - "^FCVTLT_ZPmZ_StoD$", - "^FCVTNT_ZPmZ_DtoS$")>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", + "^FCVTLT_ZPmZ_StoD", + "^FCVTNT_ZPmZ_DtoS")>; // Floating point convert, round to odd def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; // Floating point base2 log, F16 -def : InstRW<[V2Write_6cyc_4V02], (instrs FLOGB_ZPmZ_H)>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>; // Floating point base2 log, F32 -def : InstRW<[V2Write_4cyc_2V02], (instrs FLOGB_ZPmZ_S)>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>; // Floating point base2 log, F64 -def : InstRW<[V2Write_3cyc_1V02], (instrs FLOGB_ZPmZ_D)>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>; // Floating point convert to integer, F16 -def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH(_UNDEF)?$")>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; // Floating point convert to integer, F32 -def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)(_UNDEF)?$")>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; // Floating point convert to integer, F64 def : InstRW<[V2Write_3cyc_1V02], - (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)(_UNDEF)?$")>; + (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; // Floating point copy -def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$", - "^FDUP_ZI_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]", + "^FDUP_ZI_[HSD]")>; // Floating point divide, F16 -def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_ZPmZ_H$", - "^FDIV_ZPZZ_H_UNDEF$")>; +def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; // Floating point divide, F32 -def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_ZPmZ_S$", - "^FDIV_ZPZZ_S_UNDEF$")>; +def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; // Floating point divide, F64 -def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_ZPmZ_D$", - "^FDIV_ZPZZ_D_UNDEF$")>; +def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; // Floating point min/max pairwise -def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>; // Floating point min/max -def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$", - "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]_UNDEF$")>; +def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", + "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; // Floating point multiply -def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$", - "^FMULX_ZPZZ_[HSD]_UNDEF$", - "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$", - "^FMUL_ZPZ[IZ]_[HSD]_UNDEF$")>; +def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", + "^FMULX_ZPZZ_[HSD]", + "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", + "^FMUL_ZPZ[IZ]_[HSD]")>; // Floating point multiply accumulate def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA], - (instregex "^FN?ML[AS]_ZPmZZ_[HSD]$", - "^FN?(MAD|MSB)_ZPmZZ_[HSD]$")>; + (instregex "^FN?ML[AS]_ZPmZZ_[HSD]", + "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>; def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA], - (instregex "^FML[AS]_ZZZI_[HSD]$", - "^FN?ML[AS]_ZPZZZ_[HSD]_UNDEF$")>; + (instregex "^FML[AS]_ZZZI_[HSD]", + "^FN?ML[AS]_ZPZZZ_[HSD]")>; // Floating point multiply add/sub accumulate long -def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>; +def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>; // Floating point reciprocal estimate, F16 -def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H, - FRSQRTE_ZZ_H, FRECPX_ZPmZ_H_UNDEF)>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>; // Floating point reciprocal estimate, F32 -def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S, - FRSQRTE_ZZ_S, FRECPX_ZPmZ_S_UNDEF)>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>; // Floating point reciprocal estimate, F64 -def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D, - FRSQRTE_ZZ_D, FRECPX_ZPmZ_D_UNDEF)>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>; // Floating point reciprocal step -def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; +def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>; // Floating point reduction, F16 def : InstRW<[V2Write_8cyc_4V], - (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>; + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>; // Floating point reduction, F32 def : InstRW<[V2Write_6cyc_3V], - (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>; + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>; // Floating point reduction, F64 def : InstRW<[V2Write_4cyc_2V], - (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>; + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>; // Floating point round to integral, F16 -def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_H$")>; +def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; // Floating point round to integral, F32 -def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_S$")>; +def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; // Floating point round to integral, F64 -def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_D$")>; +def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; // Floating point square root, F16 -def : InstRW<[V2Write_13cyc_1V0_12rc], (instrs FSQRT_ZPmZ_H, FSQRT_ZPmZ_H_UNDEF)>; +def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>; // Floating point square root, F32 -def : InstRW<[V2Write_10cyc_1V0_9rc], (instrs FSQRT_ZPmZ_S, FSQRT_ZPmZ_S_UNDEF)>; +def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>; // Floating point square root, F64 -def : InstRW<[V2Write_16cyc_1V0_14rc], (instrs FSQRT_ZPmZ_D, FSQRT_ZPmZ_D_UNDEF)>; +def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>; // Floating point trigonometric exponentiation -def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>; +def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>; // Floating point trigonometric multiply add -def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>; +def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>; // Floating point trigonometric, miscellaneous -def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; +def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>; // SVE BFloat16 (BF16) instructions // ----------------------------------------------------------------------------- @@ -2587,7 +2578,7 @@ def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>; // Multiply accumulate long -def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?$")>; +def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>; // SVE Load instructions // ----------------------------------------------------------------------------- diff --git a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp @@ -0,0 +1,90 @@ +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +#include "gtest/gtest.h" + +using namespace llvm; +namespace { +std::unique_ptr createTargetMachine(const std::string &CPU) { + auto TT(Triple::normalize("aarch64--")); + + LLVMInitializeAArch64TargetInfo(); + LLVMInitializeAArch64Target(); + LLVMInitializeAArch64TargetMC(); + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); + + return std::unique_ptr(static_cast( + TheTarget->createTargetMachine(TT, CPU, "", TargetOptions(), std::nullopt, + std::nullopt, CodeGenOpt::Default))); +} + +std::unique_ptr createInstrInfo(TargetMachine *TM) { + AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), *TM, true); + return std::make_unique(ST); +} + +void runSVEPseudoTestForCPU(const std::string &CPU) { + + std::unique_ptr TM = createTargetMachine(CPU); + ASSERT_TRUE(TM); + std::unique_ptr II = createInstrInfo(TM.get()); + ASSERT_TRUE(II); + + const MCSubtargetInfo *STI = TM->getMCSubtargetInfo(); + MCSchedModel SchedModel = STI->getSchedModel(); + + for (unsigned i = 0; i < AArch64::INSTRUCTION_LIST_END; ++i) { + // Check if instruction is in the pseudo table + // i holds the opcode of the pseudo, OrigInstr holds the opcode of the + // original instruction + int OrigInstr = AArch64::getSVEPseudoMap(i); + if (OrigInstr == -1) + continue; + + const MCInstrDesc &Desc = II->get(i); + unsigned SCClass = Desc.getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCClass); + + const MCInstrDesc &DescOrig = II->get(OrigInstr); + unsigned SCClassOrig = DescOrig.getSchedClass(); + const MCSchedClassDesc *SCDescOrig = + SchedModel.getSchedClassDesc(SCClassOrig); + + int Latency = 0; + int LatencyOrig = 0; + + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + const MCWriteLatencyEntry *WLEntryOrig = + STI->getWriteLatencyEntry(SCDescOrig, DefIdx); + Latency = std::max(Latency, static_cast(WLEntry->Cycles)); + LatencyOrig = std::max(Latency, static_cast(WLEntryOrig->Cycles)); + } + + ASSERT_EQ(Latency, LatencyOrig); + ASSERT_TRUE(SCDesc->isValid()); + } +} + +// TODO : Add more CPUs that support SVE/SVE2 +TEST(AArch64SVESchedPseudoTesta510, IsCorrect) { + runSVEPseudoTestForCPU("cortex-a510"); +} + +TEST(AArch64SVESchedPseudoTestv2, IsCorrect) { + runSVEPseudoTestForCPU("neoverse-v2"); +} + +} // namespace diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt --- a/llvm/unittests/Target/AArch64/CMakeLists.txt +++ b/llvm/unittests/Target/AArch64/CMakeLists.txt @@ -28,6 +28,7 @@ InstSizes.cpp MatrixRegisterAliasing.cpp SMEAttributesTest.cpp + AArch64SVESchedPseudoTest.cpp ) set_property(TARGET AArch64Tests PROPERTY FOLDER "Tests/UnitTests/TargetTests")