diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // Automatically generated file, do not edit! // -// This file defines the itinerary class data for the POWER10 processor. +// This file defines instruction data for SchedModel of the POWER10 processor. // //===----------------------------------------------------------------------===// // 22 Cycles Binary Floating Point operations, 2 input operands @@ -307,41 +307,32 @@ FSELD_rec, FSELS_rec )>; -// 2 Cycles Branch operations, 0 input operands -def : InstRW<[P10W_BR_2C, P10W_DISP_ANY], - (instrs - BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR, - BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL, - BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS -)>; - // 2 Cycles Branch operations, 1 input operands def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read], (instrs B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8, BA, TAILBA, TAILBA8, - BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat, - BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat, - BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM -)>; - -// 2 Cycles Branch operations, 3 input operands -def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read], - (instrs BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR, - BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL + BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL, + BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR, + BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL, + BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS, + BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM )>; -// 2 Cycles Branch operations, 4 input operands -def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read], +// 2 Cycles Branch operations, 2 input operands +def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read], (instrs + BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat, BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat, + BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat, BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat )>; // 7 Cycles Crypto operations, 1 input operands def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read], (instrs + VGNB, VSBOX )>; @@ -358,7 +349,6 @@ VCIPHERLAST, VCLZDM, VCTZDM, - VGNB, VNCIPHER, VNCIPHERLAST, VPDEPD, @@ -384,29 +374,24 @@ XSCVSDQP, XSCVSQQP, XSCVUDQP, - XSCVUQQP + XSCVUQQP, + XSRQPI, + XSRQPIX, + XSRQPXP )>; // 13 Cycles Decimal Floating Point operations, 2 input operands def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read], (instrs + BCDSR_rec, XSADDQP, XSADDQPO, XSSUBQP, XSSUBQPO )>; -// 13 Cycles Decimal Floating Point operations, 3 input operands -def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read], - (instrs - BCDSR_rec, - XSRQPI, - XSRQPIX, - XSRQPXP -)>; - // 2-way crack instructions -// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands +// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 1 input operands def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY], (instrs HASHST, HASHST8, @@ -439,8 +424,8 @@ XSNMSUBQPO )>; -// 38 Cycles Decimal Floating Point operations, 2 input operands -def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read], +// 38 Cycles Decimal Floating Point operations, 1 input operands +def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read], (instrs BCDCFSQ_rec )>; @@ -594,20 +579,26 @@ // 5 Cycles Fixed-Point and BCD operations, 1 input operands def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read], (instrs + BCDCFN_rec, + BCDCFZ_rec, BCDCTN_rec, + BCDCTZ_rec, + BCDSETSGN_rec, VMUL10CUQ, VMUL10UQ, - XSXSIGQP + XSTSTDCQP, + XSXSIGQP, + XXGENPCVBM )>; // 5 Cycles Fixed-Point and BCD operations, 2 input operands def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read], (instrs - BCDCFN_rec, - BCDCFZ_rec, + BCDADD_rec, BCDCPSGN_rec, - BCDCTZ_rec, - BCDSETSGN_rec, + BCDS_rec, + BCDSUB_rec, + BCDTRUNC_rec, BCDUS_rec, BCDUTRUNC_rec, VADDCUQ, @@ -623,18 +614,12 @@ XSCMPOQP, XSCMPUQP, XSMAXCQP, - XSMINCQP, - XSTSTDCQP, - XXGENPCVBM + XSMINCQP )>; // 5 Cycles Fixed-Point and BCD operations, 3 input operands def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read], (instrs - BCDADD_rec, - BCDS_rec, - BCDSUB_rec, - BCDTRUNC_rec, VADDECUQ, VADDEUQM, VSUBECUQ, @@ -644,7 +629,7 @@ // 4 Cycles ALU2 operations, 0 input operands def : InstRW<[P10W_F2_4C, P10W_DISP_ANY], (instrs - TRAP, TW + MTVSRBMI )>; // 4 Cycles ALU2 operations, 1 input operands @@ -660,9 +645,9 @@ CNTTZD_rec, CNTTZW, CNTTZW8, CNTTZW8_rec, CNTTZW_rec, + EXTSWSLI_32_64_rec, EXTSWSLI_rec, FTSQRT, MTVSRBM, - MTVSRBMI, MTVSRDM, MTVSRHM, MTVSRQM, @@ -670,10 +655,18 @@ POPCNTB, POPCNTB8, POPCNTD, POPCNTW, + RLDIC_rec, + RLDICL_32_rec, RLDICL_rec, + RLDICR_rec, + RLWINM8_rec, RLWINM_rec, VCLZB, VCLZD, VCLZH, VCLZW, + VCNTMBB, + VCNTMBD, + VCNTMBH, + VCNTMBW, VCTZB, VCTZD, VCTZH, @@ -694,27 +687,40 @@ VPOPCNTW, VPRTYBD, VPRTYBW, + VSHASIGMAD, + VSHASIGMAW, XSCVHPDP, XSCVSPDPN, XSTSQRTDP, + XSTSTDCDP, + XSTSTDCSP, XVCVHPSP, XVTLSBB, XVTSQRTDP, - XVTSQRTSP + XVTSQRTSP, + XVTSTDCDP, + XVTSTDCSP )>; // 4 Cycles ALU2 operations, 2 input operands def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read], (instrs CMPEQB, - EXTSWSLI_32_64_rec, EXTSWSLI_rec, + CMPRB, CMPRB8, FCMPOD, FCMPOS, FCMPUD, FCMPUS, FTDIV, + RLDCL_rec, + RLDCR_rec, + RLDIMI_rec, + RLWIMI8_rec, RLWIMI_rec, + RLWNM8_rec, RLWNM_rec, SLD_rec, SLW8_rec, SLW_rec, SRD_rec, SRW8_rec, SRW_rec, + TDI, + TWI, VABSDUB, VABSDUH, VABSDUW, @@ -765,10 +771,6 @@ VCMPNEZW_rec, VCMPSQ, VCMPUQ, - VCNTMBB, - VCNTMBD, - VCNTMBH, - VCNTMBW, VMAXFP, VMINFP, VSUBCUW, @@ -791,8 +793,6 @@ XSMINDP, XSMINJDP, XSTDIVDP, - XSTSTDCDP, - XSTSTDCSP, XVCMPEQDP, XVCMPEQDP_rec, XVCMPEQSP, @@ -810,39 +810,24 @@ XVMINDP, XVMINSP, XVTDIVDP, - XVTDIVSP, - XVTSTDCDP, - XVTSTDCSP + XVTDIVSP )>; // 4 Cycles ALU2 operations, 3 input operands def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read], (instrs - CMPRB, CMPRB8, - RLDCL_rec, - RLDCR_rec, - RLDIC_rec, - RLDICL_32_rec, RLDICL_rec, - RLDICR_rec, TD, - TDI, - TWI, - VSHASIGMAD, - VSHASIGMAW -)>; - -// 4 Cycles ALU2 operations, 4 input operands -def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read], - (instrs - RLDIMI_rec, - RLWINM8_rec, RLWINM_rec, - RLWNM8_rec, RLWNM_rec + TRAP, TW )>; -// 4 Cycles ALU2 operations, 5 input operands -def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read], +// Single crack instructions +// 4 Cycles ALU2 operations, 1 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read], (instrs - RLWIMI8_rec, RLWIMI_rec + SRADI_rec, + SRAWI_rec, + TABORTDCI, + TABORTWCI )>; // Single crack instructions @@ -850,19 +835,9 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read], (instrs SRAD_rec, - SRADI_rec, SRAW_rec, - SRAWI_rec -)>; - -// Single crack instructions -// 4 Cycles ALU2 operations, 3 input operands -def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read], - (instrs TABORTDC, - TABORTDCI, - TABORTWC, - TABORTWCI + TABORTWC )>; // 2-way crack instructions @@ -900,32 +875,34 @@ // 3 Cycles ALU operations, 0 input operands def : InstRW<[P10W_FX_3C, P10W_DISP_ANY], (instrs - CR6SET, CREQV, CRSET, DSS, DSSALL, MCRXRX, MFCTR, MFCTR8, MFLR, MFLR8, - NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8, - VXOR, V_SET0, V_SET0B, V_SET0H, - XXLEQV, XXLEQVOnes, - XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz + WAIT )>; // 3 Cycles ALU operations, 1 input operands def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read], (instrs - ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8, - ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8, + ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8, + ADDIC, ADDIC8, + ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8, ADDME, ADDME8, ADDME8O, ADDMEO, ADDZE, ADDZE8, ADDZE8O, ADDZEO, + ANDI8_rec, ANDI_rec, + ANDIS8_rec, ANDIS_rec, + CMPDI, CMPWI, + CMPLDI, CMPLWI, EXTSB, EXTSB8, EXTSB8_32_64, EXTSB8_rec, EXTSB_rec, EXTSH, EXTSH8, EXTSH8_32_64, EXTSH8_rec, EXTSH_rec, EXTSW, EXTSW_32, EXTSW_32_64, EXTSW_32_64_rec, EXTSW_rec, + EXTSWSLI, EXTSWSLI_32_64, FABSD, FABSS, FMR, FNABSD, FNABSS, @@ -941,11 +918,20 @@ NEG, NEG8, NEG8_rec, NEG_rec, NEG8O, NEGO, + NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8, + ORIS, ORIS8, + RLDIC, + RLDICL, RLDICL_32, RLDICL_32_64, + RLDICR, RLDICR_32, + RLWINM, RLWINM8, SETB, SETB8, SETBC, SETBC8, SETBCR, SETBCR8, SETNBC, SETNBC8, SETNBCR, SETNBCR8, + SRADI, SRADI_32, + SRAWI, + SUBFIC, SUBFIC8, SUBFME, SUBFME8, SUBFME8O, SUBFMEO, SUBFZE, SUBFZE8, @@ -958,7 +944,8 @@ VEXTSW2D, VEXTSW2Ds, VNEGD, VNEGW, - WAIT, + XORI, XORI8, + XORIS, XORIS8, XSABSDP, XSABSQP, XSNABSDP, XSNABSDPs, @@ -987,29 +974,27 @@ ADD4_rec, ADD8_rec, ADDE, ADDE8, ADDE8O, ADDEO, - ADDIC, ADDIC8, + ADDEX, ADDEX8, ADD4O, ADD8O, AND, AND8, AND8_rec, AND_rec, ANDC, ANDC8, ANDC8_rec, ANDC_rec, - ANDI8_rec, ANDI_rec, - ANDIS8_rec, ANDIS_rec, CMPD, CMPW, CMPB, CMPB8, - CMPDI, CMPWI, CMPLD, CMPLW, - CMPLDI, CMPLWI, CRAND, CRANDC, + CR6SET, CREQV, CRSET, CRNAND, CRNOR, CROR, CRORC, CR6UNSET, CRUNSET, CRXOR, + DST, DST64, DSTT, DSTT64, + DSTST, DSTST64, DSTSTT, DSTSTT64, EQV, EQV8, EQV8_rec, EQV_rec, - EXTSWSLI, EXTSWSLI_32_64, FCPSGND, FCPSGNS, NAND, NAND8, NAND8_rec, NAND_rec, @@ -1019,20 +1004,21 @@ OR8_rec, OR_rec, ORC, ORC8, ORC8_rec, ORC_rec, - ORIS, ORIS8, + RLDCL, + RLDCR, + RLDIMI, + RLWIMI, RLWIMI8, + RLWNM, RLWNM8, SLD, SLW, SLW8, SRAD, - SRADI, SRADI_32, SRAW, - SRAWI, SRD, SRW, SRW8, SUBF, SUBF8, SUBF8_rec, SUBF_rec, SUBFE, SUBFE8, SUBFE8O, SUBFEO, - SUBFIC, SUBFIC8, SUBF8O, SUBFO, VADDUBM, VADDUDM, @@ -1103,10 +1089,9 @@ VSUBUDM, VSUBUHM, VSUBUWM, + VXOR, V_SET0, V_SET0B, V_SET0H, XOR, XOR8, XOR8_rec, XOR_rec, - XORI, XORI8, - XORIS, XORIS8, XSCPSGNDP, XSCPSGNQP, XSIEXPDP, @@ -1117,69 +1102,52 @@ XVIEXPSP, XXLAND, XXLANDC, + XXLEQV, XXLEQVOnes, XXLNAND, XXLNOR, XXLOR, XXLORf, - XXLORC + XXLORC, + XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz )>; // 3 Cycles ALU operations, 3 input operands def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read], (instrs - ADDEX, ADDEX8, - DST, DST64, DSTT, DSTT64, - DSTST, DSTST64, DSTSTT, DSTSTT64, ISEL, ISEL8, - RLDCL, - RLDCR, - RLDIC, - RLDICL, RLDICL_32, RLDICL_32_64, - RLDICR, RLDICR_32, VRLDMI, VRLWMI, VSEL, XXSEL )>; -// 3 Cycles ALU operations, 4 input operands -def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read], - (instrs - RLDIMI, - RLWINM, RLWINM8, - RLWNM, RLWNM8 -)>; - -// 3 Cycles ALU operations, 5 input operands -def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read], - (instrs - RLWIMI, RLWIMI8 -)>; - // Single crack instructions // 3 Cycles ALU operations, 0 input operands def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY], (instrs MFFS, MFFS_rec, + MFFSCDRNI, + MFFSCRNI, MFFSL, MFVSCR, - TRECHKPT + MTFSB0, + TBEGIN, + TRECHKPT, + TSR )>; // Single crack instructions // 3 Cycles ALU operations, 1 input operands def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read], (instrs + ADDIC_rec, ADDME8_rec, ADDME_rec, ADDME8O_rec, ADDMEO_rec, ADDZE8_rec, ADDZE_rec, ADDZE8O_rec, ADDZEO_rec, MCRFS, MFFSCDRN, - MFFSCDRNI, MFFSCRN, - MFFSCRNI, - MTFSB0, MTVSCR, NEG8O_rec, NEGO_rec, SUBFME8_rec, SUBFME_rec, @@ -1187,9 +1155,7 @@ SUBFZE8_rec, SUBFZE_rec, SUBFZE8O_rec, SUBFZEO_rec, TABORT, - TBEGIN, - TRECLAIM, - TSR + TRECLAIM )>; // Single crack instructions @@ -1198,7 +1164,6 @@ (instrs ADDE8_rec, ADDE_rec, ADDE8O_rec, ADDEO_rec, - ADDIC_rec, ADD4O_rec, ADD8O_rec, SUBFE8_rec, SUBFE_rec, SUBFE8O_rec, SUBFEO_rec, @@ -1218,7 +1183,12 @@ (instrs HRFID, MFFSCE, + MTFSB1, + MTFSFI, MTFSFIb, + MTFSFI_rec, + RFEBB, RFID, + SC, STOP )>; @@ -1230,9 +1200,8 @@ FMR_rec, FNABSD_rec, FNABSS_rec, FNEGD_rec, FNEGS_rec, - MTFSB1, - RFEBB, - SC + MTFSF, MTFSFb, + MTFSF_rec )>; // 2-way crack instructions @@ -1243,27 +1212,11 @@ ADDC8_rec, ADDC_rec, ADDC8O, ADDCO, FCPSGND_rec, FCPSGNS_rec, - MTFSF, MTFSFb, - MTFSFI, MTFSFIb, SUBFC, SUBFC8, SUBFC8_rec, SUBFC_rec, SUBFC8O, SUBFCO )>; -// 2-way crack instructions -// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands -def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read], - (instrs - MTFSFI_rec -)>; - -// 2-way crack instructions -// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands -def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read], - (instrs - MTFSF_rec -)>; - // 4-way crack instructions // 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read], @@ -1283,42 +1236,61 @@ )>; // 2-way crack instructions -// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands -def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read], +// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read], (instrs MTCRF, MTCRF8 )>; +// 6 Cycles Load operations, 0 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_ANY], + (instrs + LBZ, LBZ8, + LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD, + DFLOADf32, DFLOADf64, LFD, + LHA, LHA8, + LHZ, LHZ8, + LWA, LWA_32, + LWZ, LWZ8, LWZtoc, LWZtocL, + LXSD, + LXV +)>; + // 6 Cycles Load operations, 1 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read], (instrs - LBZ, LBZ8, - LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD, + LXVL, + LXVLL +)>; + +// 6 Cycles Load operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read], + (instrs + DCBT, + DCBTST, + ICBT, + LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32, LDBRX, - DFLOADf32, DFLOADf64, LFD, - LFDX, XFLOADf32, XFLOADf64, + LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX, + LFDX, XFLOADf32, XFLOADf64, LFIWAX, LIWAX, LFIWZX, LIWZX, - LHA, LHA8, LHAX, LHAX8, LHBRX, LHBRX8, - LHZ, LHZ8, + LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32, LVEBX, LVEHX, LVEWX, LVX, LVXL, - LWA, LWA_32, LWAX, LWAX_32, LWBRX, LWBRX8, - LWZ, LWZ8, LWZtoc, LWZtocL, - LXSD, + LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32, LXSDX, LXSIBZX, LXSIHZX, LXSIWAX, LXSIWZX, - LXV, LXVB16X, LXVD2X, LXVDSX, @@ -1332,22 +1304,8 @@ LXVX )>; -// 6 Cycles Load operations, 2 input operands -def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read], - (instrs - DCBT, - DCBTST, - ICBT, - LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32, - LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX, - LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32, - LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32, - LXVL, - LXVLL -)>; - // 2-way crack instructions -// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands +// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 1 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY], (instrs HASHCHK, HASHCHK8, @@ -1358,6 +1316,7 @@ // 6 Cycles Load operations, 0 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY], (instrs + DARN, SLBIA )>; @@ -1365,11 +1324,7 @@ // 6 Cycles Load operations, 1 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read], (instrs - DARN, - LBARX, LBARXL, - LDARX, LDARXL, - LHARX, LHARXL, - LWARX, LWARXL, + MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv, SLBFEE_rec, SLBIE, SLBMFEE, @@ -1380,48 +1335,57 @@ // 6 Cycles Load operations, 2 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read], (instrs + LBARX, LBARXL, LBZCIX, + LDARX, LDARXL, LDCIX, + LHARX, LHARXL, LHZCIX, - LWZCIX, - MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv + LWARX, LWARXL, + LWZCIX )>; // Expand instructions -// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands -def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read], +// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 0 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY], (instrs LMW )>; // Expand instructions -// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands -def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read], +// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read], (instrs LSWI )>; // 2-way crack instructions -// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands +// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY], (instrs LBZU, LBZU8, - LBZUX, LBZUX8, LDU, - LDUX, LFDU, - LFDUX, LHAU, LHAU8, - LHAUX, LHAUX8, LHZU, LHZU8, + LWZU, LWZU8 +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY], + (instrs + LBZUX, LBZUX8, + LDUX, + LFDUX, + LHAUX, LHAUX8, LHZUX, LHZUX8, LWAUX, - LWZU, LWZU8, LWZUX, LWZUX8 )>; -// 6 Cycles Load operations, 1 input operands -def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read], +// 6 Cycles Load operations, 0 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR], (instrs PLBZ, PLBZ8, PLBZ8pc, PLBZpc, PLD, PLDpc, @@ -1438,20 +1402,32 @@ )>; // 2-way crack instructions -// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands +// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 0 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C], (instrs LFS, + LXSSP +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C], + (instrs LFSX, - LXSSP, LXSSPX )>; // 4-way crack instructions -// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands +// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 0 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY], + (instrs + LFSU +)>; + +// 4-way crack instructions +// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 2 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY], (instrs - LFSU, LFSUX )>; @@ -1470,10 +1446,16 @@ )>; // 2-way crack instructions -// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands +// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C], + (instrs + LXVP +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C], (instrs - LXVP, LXVPX )>; @@ -1529,34 +1511,21 @@ XVI8GER4SPP )>; -// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands -def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read], - (instrs - PMXVF32GER, - PMXVF64GER -)>; - -// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands -def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read], +// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read], (instrs PMXVBF16GER2, PMXVF16GER2, - PMXVF32GERNN, - PMXVF32GERNP, - PMXVF32GERPN, - PMXVF32GERPP, - PMXVF64GERNN, - PMXVF64GERNP, - PMXVF64GERPN, - PMXVF64GERPP, + PMXVF32GER, + PMXVF64GER, PMXVI16GER2, PMXVI16GER2S, PMXVI4GER8, PMXVI8GER4 )>; -// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands -def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read], +// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read], (instrs PMXVBF16GER2NN, PMXVBF16GER2NP, @@ -1566,6 +1535,14 @@ PMXVF16GER2NP, PMXVF16GER2PN, PMXVF16GER2PP, + PMXVF32GERNN, + PMXVF32GERNP, + PMXVF32GERPN, + PMXVF32GERPP, + PMXVF64GERNN, + PMXVF64GERNP, + PMXVF64GERPN, + PMXVF64GERPP, PMXVI16GER2PP, PMXVI16GER2SPP, PMXVI4GER8PP, @@ -1587,6 +1564,12 @@ XXMFACC )>; +// 5 Cycles GPR Multiply operations, 1 input operands +def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read], + (instrs + MULLI, MULLI8 +)>; + // 5 Cycles GPR Multiply operations, 2 input operands def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read], (instrs @@ -1596,7 +1579,6 @@ MULHWU, MULLD, MULLDO, - MULLI, MULLI8, MULLW, MULLWO, VMULHSD, @@ -1629,7 +1611,11 @@ // 4 Cycles Permute operations, 0 input operands def : InstRW<[P10W_PM_4C, P10W_DISP_ANY], (instrs - VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH + LXVKQ, + VSPLTISB, + VSPLTISH, + VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH, + XXSPLTIB )>; // 4 Cycles Permute operations, 1 input operands @@ -1638,17 +1624,21 @@ BRD, BRH, BRH8, BRW, BRW8, - LVSL, - LVSR, - LXVKQ, MFVSRLD, MTVSRWS, VCLZLSBB, VCTZLSBB, + VEXTRACTD, + VEXTRACTUB, + VEXTRACTUH, + VEXTRACTUW, VGBBD, + VINSERTD, + VINSERTW, VPRTYBQ, - VSPLTISB, - VSPLTISH, + VSPLTB, VSPLTBs, + VSPLTH, VSPLTHs, + VSPLTW, VSTRIBL, VSTRIBR, VSTRIHL, @@ -1666,30 +1656,34 @@ XXBRH, XXBRQ, XXBRW, - XXSPLTIB + XXEXTRACTUW, + XXGENPCVDM, + XXGENPCVHM, + XXGENPCVWM, + XXSPLTW, XXSPLTWs )>; // 4 Cycles Permute operations, 2 input operands def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read], (instrs BPERMD, + LVSL, + LVSR, MTVSRDD, VBPERMD, VBPERMQ, VCLRLB, VCLRRB, - VEXTRACTD, - VEXTRACTUB, - VEXTRACTUH, - VEXTRACTUW, VEXTUBLX, VEXTUBRX, VEXTUHLX, VEXTUHRX, VEXTUWLX, VEXTUWRX, - VINSERTD, - VINSERTW, + VINSD, + VINSERTB, + VINSERTH, + VINSW, VMRGHB, VMRGHH, VMRGHW, @@ -1710,23 +1704,19 @@ VPKUWUM, VPKUWUS, VSL, + VSLDBI, + VSLDOI, VSLO, VSLV, - VSPLTB, VSPLTBs, - VSPLTH, VSPLTHs, - VSPLTW, VSR, + VSRDBI, VSRO, VSRV, - XXEXTRACTUW, - XXGENPCVDM, - XXGENPCVHM, - XXGENPCVWM, + XXINSERTW, XXMRGHW, XXMRGLW, XXPERMDI, XXPERMDIs, - XXSLDWI, XXSLDWIs, - XXSPLTW, XXSPLTWs + XXSLDWI, XXSLDWIs )>; // 4 Cycles Permute operations, 3 input operands @@ -1744,16 +1734,12 @@ VINSBRX, VINSBVLX, VINSBVRX, - VINSD, VINSDLX, VINSDRX, - VINSERTB, - VINSERTH, VINSHLX, VINSHRX, VINSHVLX, VINSHVRX, - VINSW, VINSWLX, VINSWRX, VINSWVLX, @@ -1761,10 +1747,6 @@ VPERM, VPERMR, VPERMXOR, - VSLDBI, - VSLDOI, - VSRDBI, - XXINSERTW, XXPERM, XXPERMR )>; @@ -1776,13 +1758,19 @@ VSUMSWS )>; -// 4 Cycles Permute operations, 1 input operands -def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read], +// 4 Cycles Permute operations, 0 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR], (instrs XXSPLTIDP, XXSPLTIW )>; +// 4 Cycles Permute operations, 1 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read], + (instrs + XXSPLTI32DX +)>; + // 4 Cycles Permute operations, 3 input operands def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read], (instrs @@ -1790,12 +1778,6 @@ XXBLENDVD, XXBLENDVH, XXBLENDVW, - XXSPLTI32DX -)>; - -// 4 Cycles Permute operations, 4 input operands -def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read], - (instrs XXEVAL, XXPERMX )>; @@ -1803,53 +1785,65 @@ // 3 Cycles Store operations, 1 input operands def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read], (instrs - DCBST, - DCBZ, - ICBI + PSTXVP, PSTXVPpc, + STB, STB8, + STBU, STBU8, + SPILLTOVSR_ST, STD, + STDU, + DFSTOREf32, DFSTOREf64, STFD, + STFDU, + STFS, + STFSU, + STH, STH8, + STHU, STHU8, + STW, STW8, + STWU, STWU8, + STXSD, + STXSSP, + STXV )>; // 3 Cycles Store operations, 2 input operands def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read], (instrs + CP_COPY, CP_COPY8, DCBF, - PSTXVP, PSTXVPpc, - STB, STB8, - STBU, STBU8, + DCBST, + DCBZ, + ICBI, + STXVL, + STXVLL +)>; + +// 3 Cycles Store operations, 3 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read], + (instrs STBUX, STBUX8, - SPILLTOVSR_ST, STD, + STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32, STDBRX, - STDU, STDUX, - DFSTOREf32, DFSTOREf64, STFD, - STFDU, + SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_, STFDUX, STFDX, STFIWX, STIWX, - STFS, - STFSU, STFSUX, STFSX, - STH, STH8, STHBRX, - STHU, STHU8, STHUX, STHUX8, + STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32, STVEBX, STVEHX, STVEWX, STVX, STVXL, - STW, STW8, STWBRX, - STWU, STWU8, STWUX, STWUX8, - STXSD, + STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32, STXSDX, STXSIBX, STXSIBXv, STXSIHX, STXSIHXv, STXSIWX, - STXSSP, STXSSPX, - STXV, STXVB16X, STXVD2X, STXVH8X, @@ -1861,18 +1855,6 @@ STXVX )>; -// 3 Cycles Store operations, 3 input operands -def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read], - (instrs - CP_COPY, CP_COPY8, - STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32, - SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_, - STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32, - STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32, - STXVL, - STXVLL -)>; - // Single crack instructions // 3 Cycles Store operations, 0 input operands def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY], @@ -1881,25 +1863,16 @@ MSGSYNC, SLBSYNC, TCHECK, + TEND, TLBSYNC )>; -// Single crack instructions -// 3 Cycles Store operations, 1 input operands -def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read], - (instrs - TEND -)>; - // Single crack instructions // 3 Cycles Store operations, 2 input operands def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read], (instrs + CP_PASTE8_rec, CP_PASTE_rec, SLBIEG, - STBCX, - STDCX, - STHCX, - STWCX, TLBIE )>; @@ -1907,29 +1880,26 @@ // 3 Cycles Store operations, 3 input operands def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read], (instrs - CP_PASTE8_rec, CP_PASTE_rec, STBCIX, + STBCX, STDCIX, + STDCX, STHCIX, - STWCIX + STHCX, + STWCIX, + STWCX )>; // 2-way crack instructions // 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], (instrs - ISYNC -)>; - -// 2-way crack instructions -// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands -def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], - (instrs + ISYNC, SYNC )>; // Expand instructions -// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands +// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 1 input operands def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY], (instrs LDAT, @@ -1937,7 +1907,7 @@ )>; // 4-way crack instructions -// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands +// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY], (instrs STDAT, @@ -1945,21 +1915,21 @@ )>; // Expand instructions -// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands -def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read], +// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 1 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read], (instrs STMW )>; // Expand instructions -// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands -def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read], +// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read], (instrs STSWI )>; -// 3 Cycles Store operations, 2 input operands -def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read], +// 3 Cycles Store operations, 1 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read], (instrs PSTB, PSTB8, PSTB8pc, PSTBpc, PSTD, PSTDpc, @@ -1973,10 +1943,16 @@ )>; // 2-way crack instructions -// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands -def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read], +// 3 Cycles Store operations, and 3 Cycles Store operations, 1 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read], + (instrs + STXVP +)>; + +// 2-way crack instructions +// 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read, P10ST_Read], (instrs - STXVP, STXVPX )>; @@ -2018,27 +1994,21 @@ def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY], (instrs CLRBHRB, - MFMSR + MFBHRBE, + MFMSR, + MFTB )>; // Single crack instructions // 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read], (instrs - MFTB -)>; - -// Single crack instructions -// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands -def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read], - (instrs - MFBHRBE, MTMSR, MTMSRD )>; // 2-way crack instructions -// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands +// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 0 input operands def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], (instrs ADDPCIS diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td --- a/llvm/lib/Target/PowerPC/PPCScheduleP10.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td @@ -25,14 +25,8 @@ def P10Model : SchedMachineModel { let IssueWidth = 8; - - // TODO - Need to be updated according to P10 UM. let MicroOpBufferSize = 44; - - // TODO - tune this on real HW once it arrives. For now, we will use the same - // value as we do on P9. let LoopMicroOpBufferSize = 60; - let CompleteModel = 1; // Do not support SPE (Signal Procesing Engine) on Power 10. diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -592,8 +592,8 @@ ; ; CHECK-32-P10-LABEL: testDouble1: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 4, 1, -16 +; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -16(1) ; CHECK-32-P10-NEXT: stfdx 1, 4, 3 ; CHECK-32-P10-NEXT: lxv 34, -16(1) @@ -650,8 +650,8 @@ ; CHECK-32-P10-LABEL: testDouble2: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: lfd 0, 0(3) -; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 @@ -723,8 +723,8 @@ ; CHECK-32-P10-LABEL: testDouble3: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0 -; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -120,8 +120,8 @@ ; ; CHECK-LINUXBE-LABEL: vec_xst_trunc_ss: ; CHECK-LINUXBE: # %bb.0: # %entry -; CHECK-LINUXBE-NEXT: sldi r3, r5, 1 ; CHECK-LINUXBE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-LINUXBE-NEXT: sldi r3, r5, 1 ; CHECK-LINUXBE-NEXT: stxsihx v2, r6, r3 ; CHECK-LINUXBE-NEXT: blr ; @@ -137,8 +137,8 @@ ; ; CHECK-AIXBE-LABEL: vec_xst_trunc_ss: ; CHECK-AIXBE: # %bb.0: # %entry -; CHECK-AIXBE-NEXT: sldi r3, r3, 1 ; CHECK-AIXBE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-AIXBE-NEXT: sldi r3, r3, 1 ; CHECK-AIXBE-NEXT: stxsihx v2, r4, r3 ; CHECK-AIXBE-NEXT: blr entry: @@ -158,8 +158,8 @@ ; ; CHECK-LINUXBE-LABEL: vec_xst_trunc_us: ; CHECK-LINUXBE: # %bb.0: # %entry -; CHECK-LINUXBE-NEXT: sldi r3, r5, 1 ; CHECK-LINUXBE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-LINUXBE-NEXT: sldi r3, r5, 1 ; CHECK-LINUXBE-NEXT: stxsihx v2, r6, r3 ; CHECK-LINUXBE-NEXT: blr ; @@ -175,8 +175,8 @@ ; ; CHECK-AIXBE-LABEL: vec_xst_trunc_us: ; CHECK-AIXBE: # %bb.0: # %entry -; CHECK-AIXBE-NEXT: sldi r3, r3, 1 ; CHECK-AIXBE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-AIXBE-NEXT: sldi r3, r3, 1 ; CHECK-AIXBE-NEXT: stxsihx v2, r4, r3 ; CHECK-AIXBE-NEXT: blr entry: @@ -196,8 +196,8 @@ ; ; CHECK-LINUXBE-LABEL: vec_xst_trunc_si: ; CHECK-LINUXBE: # %bb.0: # %entry -; CHECK-LINUXBE-NEXT: sldi r3, r5, 2 ; CHECK-LINUXBE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-LINUXBE-NEXT: sldi r3, r5, 2 ; CHECK-LINUXBE-NEXT: stfiwx f0, r6, r3 ; CHECK-LINUXBE-NEXT: blr ; @@ -213,8 +213,8 @@ ; ; CHECK-AIXBE-LABEL: vec_xst_trunc_si: ; CHECK-AIXBE: # %bb.0: # %entry -; CHECK-AIXBE-NEXT: sldi r3, r3, 2 ; CHECK-AIXBE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-AIXBE-NEXT: sldi r3, r3, 2 ; CHECK-AIXBE-NEXT: stfiwx f0, r4, r3 ; CHECK-AIXBE-NEXT: blr entry: @@ -234,8 +234,8 @@ ; ; CHECK-LINUXBE-LABEL: vec_xst_trunc_ui: ; CHECK-LINUXBE: # %bb.0: # %entry -; CHECK-LINUXBE-NEXT: sldi r3, r5, 2 ; CHECK-LINUXBE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-LINUXBE-NEXT: sldi r3, r5, 2 ; CHECK-LINUXBE-NEXT: stfiwx f0, r6, r3 ; CHECK-LINUXBE-NEXT: blr ; @@ -251,8 +251,8 @@ ; ; CHECK-AIXBE-LABEL: vec_xst_trunc_ui: ; CHECK-AIXBE: # %bb.0: # %entry -; CHECK-AIXBE-NEXT: sldi r3, r3, 2 ; CHECK-AIXBE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-AIXBE-NEXT: sldi r3, r3, 2 ; CHECK-AIXBE-NEXT: stfiwx f0, r4, r3 ; CHECK-AIXBE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -454,8 +454,8 @@ ; LE-PAIRED-NEXT: add r6, r5, r3 ; LE-PAIRED-NEXT: lxvx v3, r5, r3 ; LE-PAIRED-NEXT: sldi r3, r4, 5 -; LE-PAIRED-NEXT: lxv v2, 16(r6) ; LE-PAIRED-NEXT: add r4, r5, r3 +; LE-PAIRED-NEXT: lxv v2, 16(r6) ; LE-PAIRED-NEXT: stxvx v3, r5, r3 ; LE-PAIRED-NEXT: stxv v2, 16(r4) ; LE-PAIRED-NEXT: blr @@ -468,8 +468,8 @@ ; BE-PAIRED-NEXT: add r6, r5, r3 ; BE-PAIRED-NEXT: lxvx v2, r5, r3 ; BE-PAIRED-NEXT: sldi r3, r4, 5 -; BE-PAIRED-NEXT: lxv v3, 16(r6) ; BE-PAIRED-NEXT: add r4, r5, r3 +; BE-PAIRED-NEXT: lxv v3, 16(r6) ; BE-PAIRED-NEXT: stxvx v2, r5, r3 ; BE-PAIRED-NEXT: stxv v3, 16(r4) ; BE-PAIRED-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll @@ -20,8 +20,8 @@ define dso_local signext i32 @jumptable(i32 signext %param) { ; CHECK-R-LABEL: jumptable: ; CHECK-R: # %bb.1: # %entry -; CHECK-R-NEXT: paddi r5, 0, .LJTI0_0@PCREL, 1 ; CHECK-R-NEXT: rldic r4, r4 +; CHECK-R-NEXT: paddi r5, 0, .LJTI0_0@PCREL, 1 ; CHECK-R-NEXT: lwax r4, r4, r5 ; CHECK-R-NEXT: add r4, r4, r5 ; CHECK-R-NEXT: mtctr r4 @@ -35,8 +35,8 @@ ; CHECK-A-LE-NEXT: bctr ; CHECK-A-BE-LABEL: jumptable: ; CHECK-A-BE: # %bb.1: # %entry -; CHECK-A-BE-NEXT: paddi r5, 0, .LJTI0_0@PCREL, 1 ; CHECK-A-BE-NEXT: rldic r4, r4 +; CHECK-A-BE-NEXT: paddi r5, 0, .LJTI0_0@PCREL, 1 ; CHECK-A-BE-NEXT: lwax r4, r4, r5 ; CHECK-A-BE-NEXT: mtctr r4 ; CHECK-A-BE-NEXT: bctr diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll --- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll +++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll @@ -108,8 +108,8 @@ ; CHECK-P10-CMP-LE-NEXT: lbz r3, 0(r3) ; CHECK-P10-CMP-LE-NEXT: lbz r4, testCompare1@toc@l(r4) ; CHECK-P10-CMP-LE-NEXT: clrlwi r3, r3, 31 -; CHECK-P10-CMP-LE-NEXT: clrlwi r4, r4, 31 ; CHECK-P10-CMP-LE-NEXT: clrldi r3, r3, 32 +; CHECK-P10-CMP-LE-NEXT: clrlwi r4, r4, 31 ; CHECK-P10-CMP-LE-NEXT: clrldi r4, r4, 32 ; CHECK-P10-CMP-LE-NEXT: sub r3, r3, r4 ; CHECK-P10-CMP-LE-NEXT: rldicl r3, r3, 1, 63 diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll --- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll +++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll @@ -110,8 +110,8 @@ ; CHECK-P10-CMP-LE-NEXT: lbz r3, 0(r3) ; CHECK-P10-CMP-LE-NEXT: lbz r4, testCompare1@toc@l(r4) ; CHECK-P10-CMP-LE-NEXT: clrlwi r3, r3, 31 -; CHECK-P10-CMP-LE-NEXT: clrlwi r4, r4, 31 ; CHECK-P10-CMP-LE-NEXT: clrldi r3, r3, 32 +; CHECK-P10-CMP-LE-NEXT: clrlwi r4, r4, 31 ; CHECK-P10-CMP-LE-NEXT: clrldi r4, r4, 32 ; CHECK-P10-CMP-LE-NEXT: sub r3, r4, r3 ; CHECK-P10-CMP-LE-NEXT: rldicl r3, r3, 1, 63 diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll @@ -1083,16 +1083,16 @@ ; PWR10BE-NEXT: addi r3, r3, .LCPI17_1@toc@l ; PWR10BE-NEXT: lxv v4, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI17_2@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI17_2@toc@l ; PWR10BE-NEXT: vperm v3, v2, v2, v3 +; PWR10BE-NEXT: addi r3, r3, .LCPI17_2@toc@l +; PWR10BE-NEXT: vextsb2w v3, v3 ; PWR10BE-NEXT: lxv v5, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI17_3@toc@ha -; PWR10BE-NEXT: vextsb2w v3, v3 -; PWR10BE-NEXT: addi r3, r3, .LCPI17_3@toc@l ; PWR10BE-NEXT: vperm v4, v2, v2, v4 +; PWR10BE-NEXT: addi r3, r3, .LCPI17_3@toc@l +; PWR10BE-NEXT: vextsb2w v4, v4 ; PWR10BE-NEXT: lxv v0, 0(r3) ; PWR10BE-NEXT: li r3, 0 -; PWR10BE-NEXT: vextsb2w v4, v4 ; PWR10BE-NEXT: vperm v5, v2, v2, v5 ; PWR10BE-NEXT: vadduwm v3, v4, v3 ; PWR10BE-NEXT: vextsb2w v5, v5 @@ -1206,15 +1206,15 @@ ; PWR10BE-NEXT: addi r3, r3, .LCPI18_1@toc@l ; PWR10BE-NEXT: lxv v5, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI18_2@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI18_2@toc@l ; PWR10BE-NEXT: vperm v3, v4, v2, v3 +; PWR10BE-NEXT: addi r3, r3, .LCPI18_2@toc@l ; PWR10BE-NEXT: lxv v0, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI18_3@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI18_3@toc@l ; PWR10BE-NEXT: vperm v5, v4, v2, v5 +; PWR10BE-NEXT: addi r3, r3, .LCPI18_3@toc@l +; PWR10BE-NEXT: vadduwm v3, v5, v3 ; PWR10BE-NEXT: lxv v1, 0(r3) ; PWR10BE-NEXT: li r3, 0 -; PWR10BE-NEXT: vadduwm v3, v5, v3 ; PWR10BE-NEXT: vperm v0, v4, v2, v0 ; PWR10BE-NEXT: vperm v2, v4, v2, v1 ; PWR10BE-NEXT: vadduwm v2, v2, v0 @@ -1572,38 +1572,38 @@ ; PWR10BE-NEXT: addi r3, r3, .LCPI23_1@toc@l ; PWR10BE-NEXT: lxv v4, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI23_2@toc@l ; PWR10BE-NEXT: vperm v3, v2, v2, v3 +; PWR10BE-NEXT: addi r3, r3, .LCPI23_2@toc@l +; PWR10BE-NEXT: vextsb2d v3, v3 ; PWR10BE-NEXT: lxv v5, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_3@toc@ha -; PWR10BE-NEXT: vextsb2d v3, v3 -; PWR10BE-NEXT: addi r3, r3, .LCPI23_3@toc@l ; PWR10BE-NEXT: vperm v4, v2, v2, v4 +; PWR10BE-NEXT: addi r3, r3, .LCPI23_3@toc@l +; PWR10BE-NEXT: vextsb2d v4, v4 ; PWR10BE-NEXT: lxv v0, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_4@toc@ha -; PWR10BE-NEXT: vextsb2d v4, v4 -; PWR10BE-NEXT: addi r3, r3, .LCPI23_4@toc@l ; PWR10BE-NEXT: vperm v5, v2, v2, v5 +; PWR10BE-NEXT: addi r3, r3, .LCPI23_4@toc@l +; PWR10BE-NEXT: vextsb2d v5, v5 ; PWR10BE-NEXT: lxv v1, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha -; PWR10BE-NEXT: vextsb2d v5, v5 -; PWR10BE-NEXT: addi r3, r3, .LCPI23_5@toc@l ; PWR10BE-NEXT: vperm v0, v2, v2, v0 +; PWR10BE-NEXT: addi r3, r3, .LCPI23_5@toc@l +; PWR10BE-NEXT: vextsb2d v0, v0 ; PWR10BE-NEXT: lxv v6, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha -; PWR10BE-NEXT: vextsb2d v0, v0 -; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l ; PWR10BE-NEXT: vperm v1, v2, v2, v1 ; PWR10BE-NEXT: vaddudm v5, v0, v5 ; PWR10BE-NEXT: vaddudm v3, v4, v3 ; PWR10BE-NEXT: vaddudm v3, v3, v5 +; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l +; PWR10BE-NEXT: vextsb2d v1, v1 ; PWR10BE-NEXT: lxv v7, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_7@toc@ha -; PWR10BE-NEXT: vextsb2d v1, v1 -; PWR10BE-NEXT: addi r3, r3, .LCPI23_7@toc@l ; PWR10BE-NEXT: vperm v6, v2, v2, v6 -; PWR10BE-NEXT: lxv v8, 0(r3) +; PWR10BE-NEXT: addi r3, r3, .LCPI23_7@toc@l ; PWR10BE-NEXT: vextsb2d v6, v6 +; PWR10BE-NEXT: lxv v8, 0(r3) ; PWR10BE-NEXT: vperm v7, v2, v2, v7 ; PWR10BE-NEXT: vextsb2d v7, v7 ; PWR10BE-NEXT: vperm v2, v2, v2, v8 @@ -1758,28 +1758,28 @@ ; PWR10BE-NEXT: addi r3, r3, .LCPI24_1@toc@l ; PWR10BE-NEXT: lxv v5, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI24_2@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI24_2@toc@l ; PWR10BE-NEXT: vperm v3, v4, v2, v3 +; PWR10BE-NEXT: addi r3, r3, .LCPI24_2@toc@l ; PWR10BE-NEXT: lxv v0, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI24_3@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI24_3@toc@l ; PWR10BE-NEXT: vperm v5, v4, v2, v5 +; PWR10BE-NEXT: addi r3, r3, .LCPI24_3@toc@l ; PWR10BE-NEXT: lxv v1, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI24_4@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI24_4@toc@l ; PWR10BE-NEXT: vperm v0, v4, v2, v0 +; PWR10BE-NEXT: addi r3, r3, .LCPI24_4@toc@l ; PWR10BE-NEXT: lxv v6, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI24_5@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI24_5@toc@l ; PWR10BE-NEXT: vperm v1, v4, v2, v1 +; PWR10BE-NEXT: addi r3, r3, .LCPI24_5@toc@l ; PWR10BE-NEXT: lxv v7, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI24_6@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI24_6@toc@l ; PWR10BE-NEXT: vperm v6, v4, v2, v6 +; PWR10BE-NEXT: addi r3, r3, .LCPI24_6@toc@l ; PWR10BE-NEXT: lxv v8, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI24_7@toc@ha -; PWR10BE-NEXT: addi r3, r3, .LCPI24_7@toc@l ; PWR10BE-NEXT: vperm v7, v4, v2, v7 +; PWR10BE-NEXT: addi r3, r3, .LCPI24_7@toc@l ; PWR10BE-NEXT: lxv v9, 0(r3) ; PWR10BE-NEXT: vperm v8, v4, v2, v8 ; PWR10BE-NEXT: vperm v2, v4, v2, v9