diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -0,0 +1,2075 @@ +//===--- P10InstrResources.td - P10 Scheduling Definitions -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Automatically generated file, do not edit! +// +// This file defines the itinerary class data for the POWER10 processor. +// +//===----------------------------------------------------------------------===// +// 22 Cycles Binary Floating Point operations, 2 input operands +def : InstRW<[P10W_BF_22C, P10W_DISP_ANY, P10BF_Read, P10BF_Read], + (instrs + FDIVS, + XSDIVSP +)>; + +// 2-way crack instructions +// 22 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_BF_22C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + FDIVS_rec +)>; + +// 24 Cycles Binary Floating Point operations, 2 input operands +def : InstRW<[P10W_BF_24C, P10W_DISP_ANY, P10BF_Read, P10BF_Read], + (instrs + XVDIVSP +)>; + +// 26 Cycles Binary Floating Point operations, 1 input operands +def : InstRW<[P10W_BF_26C, P10W_DISP_ANY, P10BF_Read], + (instrs + FSQRTS, + XSSQRTSP +)>; + +// 2-way crack instructions +// 26 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_BF_26C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + FSQRTS_rec +)>; + +// 27 Cycles Binary Floating Point operations, 1 input operands +def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read], + (instrs + XVSQRTSP +)>; + +// 27 Cycles Binary Floating Point operations, 2 input operands +def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read, P10BF_Read], + (instrs + FDIV, + XSDIVDP, + XVDIVDP +)>; + +// 2-way crack instructions +// 27 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_BF_27C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + FDIV_rec +)>; + +// 36 Cycles Binary Floating Point operations, 1 input operands +def : InstRW<[P10W_BF_36C, P10W_DISP_ANY, P10BF_Read], + (instrs + FSQRT, + XSSQRTDP, + XVSQRTDP +)>; + +// 2-way crack instructions +// 36 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_BF_36C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + FSQRT_rec +)>; + +// 7 Cycles Binary Floating Point operations, 1 input operands +def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read], + (instrs + FCFID, + FCFIDS, + FCFIDU, + FCFIDUS, + FCTID, + FCTIDU, + FCTIDUZ, + FCTIDZ, + FCTIW, + FCTIWU, + FCTIWUZ, + FCTIWZ, + FRE, + FRES, + FRIMD, FRIMS, + FRIND, FRINS, + FRIPD, FRIPS, + FRIZD, FRIZS, + FRSP, + FRSQRTE, + FRSQRTES, + VCFSX, VCFSX_0, + VCFUX, VCFUX_0, + VCTSXS, VCTSXS_0, + VCTUXS, VCTUXS_0, + VLOGEFP, + VREFP, + VRFIM, + VRFIN, + VRFIP, + VRFIZ, + VRSQRTEFP, + XSCVDPHP, + XSCVDPSP, + XSCVDPSPN, + XSCVDPSXDS, XSCVDPSXDSs, + XSCVDPSXWS, XSCVDPSXWSs, + XSCVDPUXDS, XSCVDPUXDSs, + XSCVDPUXWS, XSCVDPUXWSs, + XSCVSPDP, + XSCVSXDDP, + XSCVSXDSP, + XSCVUXDDP, + XSCVUXDSP, + XSRDPI, + XSRDPIC, + XSRDPIM, + XSRDPIP, + XSRDPIZ, + XSREDP, + XSRESP, + XSRSP, + XSRSQRTEDP, + XSRSQRTESP, + XVCVDPSP, + XVCVDPSXDS, + XVCVDPSXWS, + XVCVDPUXDS, + XVCVDPUXWS, + XVCVSPBF16, + XVCVSPDP, + XVCVSPHP, + XVCVSPSXDS, + XVCVSPSXWS, + XVCVSPUXDS, + XVCVSPUXWS, + XVCVSXDDP, + XVCVSXDSP, + XVCVSXWDP, + XVCVSXWSP, + XVCVUXDDP, + XVCVUXDSP, + XVCVUXWDP, + XVCVUXWSP, + XVRDPI, + XVRDPIC, + XVRDPIM, + XVRDPIP, + XVRDPIZ, + XVREDP, + XVRESP, + XVRSPI, + XVRSPIC, + XVRSPIM, + XVRSPIP, + XVRSPIZ, + XVRSQRTEDP, + XVRSQRTESP +)>; + +// 7 Cycles Binary Floating Point operations, 2 input operands +def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read], + (instrs + FADD, + FADDS, + FMUL, + FMULS, + FSUB, + FSUBS, + VADDFP, + VSUBFP, + XSADDDP, + XSADDSP, + XSMULDP, + XSMULSP, + XSSUBDP, + XSSUBSP, + XVADDDP, + XVADDSP, + XVMULDP, + XVMULSP, + XVSUBDP, + XVSUBSP +)>; + +// 7 Cycles Binary Floating Point operations, 3 input operands +def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read, P10BF_Read], + (instrs + FMADD, + FMADDS, + FMSUB, + FMSUBS, + FNMADD, + FNMADDS, + FNMSUB, + FNMSUBS, + FSELD, FSELS, + VMADDFP, + VNMSUBFP, + XSMADDADP, + XSMADDASP, + XSMADDMDP, + XSMADDMSP, + XSMSUBADP, + XSMSUBASP, + XSMSUBMDP, + XSMSUBMSP, + XSNMADDADP, + XSNMADDASP, + XSNMADDMDP, + XSNMADDMSP, + XSNMSUBADP, + XSNMSUBASP, + XSNMSUBMDP, + XSNMSUBMSP, + XVMADDADP, + XVMADDASP, + XVMADDMDP, + XVMADDMSP, + XVMSUBADP, + XVMSUBASP, + XVMSUBMDP, + XVMSUBMSP, + XVNMADDADP, + XVNMADDASP, + XVNMADDMDP, + XVNMADDMSP, + XVNMSUBADP, + XVNMSUBASP, + XVNMSUBMDP, + XVNMSUBMSP +)>; + +// 2-way crack instructions +// 7 Cycles Binary Floating Point operations, and 7 Cycles Binary Floating Point operations, 1 input operands +def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_BF_7C, P10W_DISP_ANY, P10BF_Read], + (instrs + VEXPTEFP +)>; + +// 2-way crack instructions +// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + FADD_rec, + FADDS_rec, + FMUL_rec, + FMULS_rec, + FSUB_rec, + FSUBS_rec +)>; + +// 2-way crack instructions +// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + FCFID_rec, + FCFIDS_rec, + FCFIDU_rec, + FCFIDUS_rec, + FCTID_rec, + FCTIDU_rec, + FCTIDUZ_rec, + FCTIDZ_rec, + FCTIW_rec, + FCTIWU_rec, + FCTIWUZ_rec, + FCTIWZ_rec, + FRE_rec, + FRES_rec, + FRIMD_rec, FRIMS_rec, + FRIND_rec, FRINS_rec, + FRIPD_rec, FRIPS_rec, + FRIZD_rec, FRIZS_rec, + FRSP_rec, + FRSQRTE_rec, + FRSQRTES_rec +)>; + +// 2-way crack instructions +// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 3 input operands +def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + FMADD_rec, + FMADDS_rec, + FMSUB_rec, + FMSUBS_rec, + FNMADD_rec, + FNMADDS_rec, + FNMSUB_rec, + FNMSUBS_rec, + FSELD_rec, FSELS_rec +)>; + +// 2 Cycles Branch operations, 0 input operands +def : InstRW<[P10W_BR_2C, P10W_DISP_ANY], + (instrs + BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR, + BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL, + BL, BL8, BL8_NOP, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_TLS, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_TLS +)>; + +// 2 Cycles Branch operations, 1 input operands +def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read], + (instrs + B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8, + BA, TAILBA, TAILBA8, + BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL_LWZinto_toc, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat, + BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat, + BLA, BLA8, BLA8_NOP +)>; + +// 2 Cycles Branch operations, 3 input operands +def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read], + (instrs + BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR, + BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL +)>; + +// 2 Cycles Branch operations, 4 input operands +def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read], + (instrs + BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat, + BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat +)>; + +// 7 Cycles Crypto operations, 1 input operands +def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read], + (instrs + VSBOX +)>; + +// 7 Cycles Crypto operations, 2 input operands +def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read], + (instrs + CFUGED, + CNTLZDM, + CNTTZDM, + PDEPD, + PEXTD, + VCFUGED, + VCIPHER, + VCIPHERLAST, + VCLZDM, + VCTZDM, + VGNB, + VNCIPHER, + VNCIPHERLAST, + VPDEPD, + VPEXTD, + VPMSUMB, + VPMSUMD, + VPMSUMH, + VPMSUMW +)>; + +// 13 Cycles Decimal Floating Point operations, 1 input operands +def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read], + (instrs + XSCVDPQP, + XSCVQPDP, + XSCVQPDPO, + XSCVQPSDZ, + XSCVQPSQZ, + XSCVQPSWZ, + XSCVQPUDZ, + XSCVQPUQZ, + XSCVQPUWZ, + XSCVSDQP, + XSCVSQQP, + XSCVUDQP, + XSCVUQQP +)>; + +// 13 Cycles Decimal Floating Point operations, 2 input operands +def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read], + (instrs + XSADDQP, + XSADDQPO, + XSSUBQP, + XSSUBQPO +)>; + +// 13 Cycles Decimal Floating Point operations, 3 input operands +def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read], + (instrs + BCDSR_rec, + XSRQPI, + XSRQPIX, + XSRQPXP +)>; + +// 2-way crack instructions +// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands +def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY], + (instrs + HASHST, + HASHSTP +)>; + +// 24 Cycles Decimal Floating Point operations, 1 input operands +def : InstRW<[P10W_DF_24C, P10W_DISP_ANY, P10DF_Read], + (instrs + BCDCTSQ_rec +)>; + +// 25 Cycles Decimal Floating Point operations, 2 input operands +def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read], + (instrs + XSMULQP, + XSMULQPO +)>; + +// 25 Cycles Decimal Floating Point operations, 3 input operands +def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read], + (instrs + XSMADDQP, + XSMADDQPO, + XSMSUBQP, + XSMSUBQPO, + XSNMADDQP, + XSNMADDQPO, + XSNMSUBQP, + XSNMSUBQPO +)>; + +// 38 Cycles Decimal Floating Point operations, 2 input operands +def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read], + (instrs + BCDCFSQ_rec +)>; + +// 59 Cycles Decimal Floating Point operations, 2 input operands +def : InstRW<[P10W_DF_59C, P10W_DISP_ANY, P10DF_Read, P10DF_Read], + (instrs + XSDIVQP, + XSDIVQPO +)>; + +// 61 Cycles Decimal Floating Point operations, 2 input operands +def : InstRW<[P10W_DF_61C, P10W_DISP_ANY, P10DF_Read, P10DF_Read], + (instrs + VDIVESQ, + VDIVEUQ, + VDIVSQ, + VDIVUQ +)>; + +// 68 Cycles Decimal Floating Point operations, 2 input operands +def : InstRW<[P10W_DF_68C, P10W_DISP_ANY, P10DF_Read, P10DF_Read], + (instrs + VMODSQ, + VMODUQ +)>; + +// 77 Cycles Decimal Floating Point operations, 1 input operands +def : InstRW<[P10W_DF_77C, P10W_DISP_ANY, P10DF_Read], + (instrs + XSSQRTQP, + XSSQRTQPO +)>; + +// 20 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_20C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + DIVW, + DIVWO, + DIVWU, + DIVWUO, + MODSW +)>; + +// 2-way crack instructions +// 20 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_DV_20C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + DIVW_rec, + DIVWO_rec, + DIVWU_rec, + DIVWUO_rec +)>; + +// 25 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_25C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + DIVD, + DIVDO, + DIVDU, + DIVDUO, + DIVWE, + DIVWEO, + DIVWEU, + DIVWEUO +)>; + +// 2-way crack instructions +// 25 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_DV_25C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + DIVD_rec, + DIVDO_rec, + DIVDU_rec, + DIVDUO_rec, + DIVWE_rec, + DIVWEO_rec, + DIVWEU_rec, + DIVWEUO_rec +)>; + +// 27 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_27C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + MODSD, + MODUD, + MODUW +)>; + +// 41 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_41C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + DIVDE, + DIVDEO, + DIVDEU, + DIVDEUO +)>; + +// 2-way crack instructions +// 41 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_DV_41C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + DIVDE_rec, + DIVDEO_rec, + DIVDEU_rec, + DIVDEUO_rec +)>; + +// 43 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_43C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + VDIVSD, + VDIVUD +)>; + +// 47 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_47C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + VMODSD, + VMODUD +)>; + +// 54 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_54C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + VDIVSW, + VDIVUW +)>; + +// 60 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_60C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + VMODSW, + VMODUW +)>; + +// 75 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_75C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + VDIVESD, + VDIVEUD +)>; + +// 83 Cycles Scalar Fixed-Point Divide operations, 2 input operands +def : InstRW<[P10W_DV_83C, P10W_DISP_ANY, P10DV_Read, P10DV_Read], + (instrs + VDIVESW, + VDIVEUW +)>; + +// 5 Cycles Fixed-Point and BCD operations, 1 input operands +def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read], + (instrs + BCDCTN_rec, + VMUL10CUQ, + VMUL10UQ, + XSXSIGQP +)>; + +// 5 Cycles Fixed-Point and BCD operations, 2 input operands +def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read], + (instrs + BCDCFN_rec, + BCDCFZ_rec, + BCDCPSGN_rec, + BCDCTZ_rec, + BCDSETSGN_rec, + BCDUS_rec, + BCDUTRUNC_rec, + VADDCUQ, + VADDUQM, + VMUL10ECUQ, + VMUL10EUQ, + VSUBCUQ, + VSUBUQM, + XSCMPEXPQP, + XSCMPOQP, + XSCMPUQP, + XSTSTDCQP, + XXGENPCVBM +)>; + +// 5 Cycles Fixed-Point and BCD operations, 3 input operands +def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read], + (instrs + BCDS_rec, + BCDTRUNC_rec, + VADDECUQ, + VADDEUQM, + VSUBECUQ, + VSUBEUQM +)>; + +// 4 Cycles ALU2 operations, 0 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_ANY], + (instrs + TRAP, TW +)>; + +// 4 Cycles ALU2 operations, 1 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read], + (instrs + CNTLZD, + CNTLZD_rec, + CNTLZW, CNTLZW8, + CNTLZW8_rec, CNTLZW_rec, + CNTTZD, + CNTTZD_rec, + CNTTZW, CNTTZW8, + CNTTZW8_rec, CNTTZW_rec, + FTSQRT, + MTVSRBM, + MTVSRBMI, + MTVSRDM, + MTVSRHM, + MTVSRQM, + MTVSRWM, + POPCNTB, POPCNTB8, + POPCNTD, + POPCNTW, + VCLZB, + VCLZD, + VCLZH, + VCLZW, + VCTZB, + VCTZD, + VCTZH, + VCTZW, + VEXPANDBM, + VEXPANDDM, + VEXPANDHM, + VEXPANDQM, + VEXPANDWM, + VEXTRACTBM, + VEXTRACTDM, + VEXTRACTHM, + VEXTRACTQM, + VEXTRACTWM, + VPOPCNTB, + VPOPCNTD, + VPOPCNTH, + VPOPCNTW, + VPRTYBD, + VPRTYBW, + XSCVHPDP, + XSCVSPDPN, + XSTSQRTDP, + XVCVHPSP, + XVTLSBB, + XVTSQRTDP, + XVTSQRTSP +)>; + +// 4 Cycles ALU2 operations, 2 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read], + (instrs + CMPEQB, + EXTSWSLI_32_64_rec, EXTSWSLI_rec, + FCMPOD, FCMPOS, + FCMPUD, FCMPUS, + FTDIV, + SLD_rec, + SLW8_rec, SLW_rec, + SRD_rec, + SRW8_rec, SRW_rec, + VABSDUB, + VABSDUH, + VABSDUW, + VADDCUW, + VADDSBS, + VADDSHS, + VADDSWS, + VADDUBS, + VADDUHS, + VADDUWS, + VAVGSB, + VAVGSH, + VAVGSW, + VAVGUB, + VAVGUH, + VAVGUW, + VCMPBFP, + VCMPBFP_rec, + VCMPEQFP, + VCMPEQFP_rec, + VCMPEQUB_rec, + VCMPEQUD_rec, + VCMPEQUH_rec, + VCMPEQUQ, + VCMPEQUQ_rec, + VCMPEQUW_rec, + VCMPGEFP, + VCMPGEFP_rec, + VCMPGTFP, + VCMPGTFP_rec, + VCMPGTSB_rec, + VCMPGTSD_rec, + VCMPGTSH_rec, + VCMPGTSQ, + VCMPGTSQ_rec, + VCMPGTSW_rec, + VCMPGTUB_rec, + VCMPGTUD_rec, + VCMPGTUH_rec, + VCMPGTUQ, + VCMPGTUQ_rec, + VCMPGTUW_rec, + VCMPNEB_rec, + VCMPNEH_rec, + VCMPNEW_rec, + VCMPNEZB_rec, + VCMPNEZH_rec, + VCMPNEZW_rec, + VCMPSQ, + VCMPUQ, + VCNTMBB, + VCNTMBD, + VCNTMBH, + VCNTMBW, + VMAXFP, + VMINFP, + VSUBCUW, + VSUBSBS, + VSUBSHS, + VSUBSWS, + VSUBUBS, + VSUBUHS, + VSUBUWS, + XSCMPEQDP, + XSCMPEXPDP, + XSCMPGEDP, + XSCMPGTDP, + XSCMPODP, + XSCMPUDP, + XSMAXCDP, + XSMAXDP, + XSMAXJDP, + XSMINCDP, + XSMINDP, + XSMINJDP, + XSTDIVDP, + XSTSTDCDP, + XSTSTDCSP, + XVCMPEQDP, + XVCMPEQDP_rec, + XVCMPEQSP, + XVCMPEQSP_rec, + XVCMPGEDP, + XVCMPGEDP_rec, + XVCMPGESP, + XVCMPGESP_rec, + XVCMPGTDP, + XVCMPGTDP_rec, + XVCMPGTSP, + XVCMPGTSP_rec, + XVMAXDP, + XVMAXSP, + XVMINDP, + XVMINSP, + XVTDIVDP, + XVTDIVSP, + XVTSTDCDP, + XVTSTDCSP +)>; + +// 4 Cycles ALU2 operations, 3 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read], + (instrs + CMPRB, CMPRB8, + RLDCL_rec, + RLDCR_rec, + RLDIC_rec, + RLDICL_32_rec, RLDICL_rec, + RLDICR_rec, + TD, + TDI, + TWI, + VSHASIGMAD, + VSHASIGMAW +)>; + +// 4 Cycles ALU2 operations, 4 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read], + (instrs + RLDIMI_rec, + RLWINM8_rec, RLWINM_rec, + RLWNM8_rec, RLWNM_rec +)>; + +// 4 Cycles ALU2 operations, 5 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read], + (instrs + RLWIMI8_rec, RLWIMI_rec +)>; + +// Single crack instructions +// 4 Cycles ALU2 operations, 2 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read], + (instrs + SRAD_rec, + SRADI_rec, + SRAW_rec, + SRAWI_rec +)>; + +// Single crack instructions +// 4 Cycles ALU2 operations, 3 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read], + (instrs + TABORTDC, + TABORTDCI, + TABORTWC, + TABORTWCI +)>; + +// 2-way crack instructions +// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 2 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY], + (instrs + VRLQ, + VRLQNM, + VSLQ, + VSRAQ, + VSRQ +)>; + +// 2-way crack instructions +// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 3 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY], + (instrs + VRLQMI +)>; + +// 2-way crack instructions +// 4 Cycles ALU2 operations, and 4 Cycles ALU2 operations, 0 input operands +def : InstRW<[P10W_F2_4C, P10W_DISP_PAIR, P10W_F2_4C], + (instrs + MFCR, MFCR8 +)>; + +// 2 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_FX_2C, P10W_DISP_ANY, P10FX_Read], + (instrs + MTCTR, MTCTR8, MTCTR8loop, MTCTRloop, + MTLR, MTLR8 +)>; + +// 3 Cycles ALU operations, 0 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_ANY], + (instrs + CR6SET, CREQV, CRSET, + DSS, DSSALL, + MCRXRX, + MFCTR, MFCTR8, + MFLR, MFLR8, + NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8, + VXOR, V_SET0, V_SET0B, V_SET0H, + XXLEQV, XXLEQVOnes, + XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz +)>; + +// 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read], + (instrs + ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8, + ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8, + ADDME, ADDME8, + ADDME8O, ADDMEO, + ADDZE, ADDZE8, + ADDZE8O, ADDZEO, + EXTSB, EXTSB8, EXTSB8_32_64, + EXTSB8_rec, EXTSB_rec, + EXTSH, EXTSH8, EXTSH8_32_64, + EXTSH8_rec, EXTSH_rec, + EXTSW, EXTSW_32, EXTSW_32_64, + EXTSW_32_64_rec, EXTSW_rec, + FABSD, FABSS, + FMR, + FNABSD, FNABSS, + FNEGD, FNEGS, + MCRF, + MFOCRF, MFOCRF8, + MFVRD, MFVSRD, + MFVRWZ, MFVSRWZ, + MTOCRF, MTOCRF8, + MTVRD, MTVSRD, + MTVRWA, MTVSRWA, + MTVRWZ, MTVSRWZ, + NEG, NEG8, + NEG8_rec, NEG_rec, + NEG8O, NEGO, + SETB, SETB8, + SETBC, SETBC8, + SETBCR, SETBCR8, + SETNBC, SETNBC8, + SETNBCR, SETNBCR8, + SUBFME, SUBFME8, + SUBFME8O, SUBFMEO, + SUBFZE, SUBFZE8, + SUBFZE8O, SUBFZEO, + VEXTSB2D, VEXTSB2Ds, + VEXTSB2W, VEXTSB2Ws, + VEXTSD2Q, + VEXTSH2D, VEXTSH2Ds, + VEXTSH2W, VEXTSH2Ws, + VEXTSW2D, VEXTSW2Ds, + VNEGD, + VNEGW, + WAIT, + XSABSDP, + XSABSQP, + XSNABSDP, + XSNABSQP, + XSNEGDP, + XSNEGQP, + XSXEXPDP, + XSXEXPQP, + XSXSIGDP, + XVABSDP, + XVABSSP, + XVNABSDP, + XVNABSSP, + XVNEGDP, + XVNEGSP, + XVXEXPDP, + XVXEXPSP, + XVXSIGDP, + XVXSIGSP +)>; + +// 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read], + (instrs + ADD4, ADD4TLS, ADD8, ADD8TLS, ADD8TLS_, + ADD4_rec, ADD8_rec, + ADDE, ADDE8, + ADDE8O, ADDEO, + ADDIC, ADDIC8, + ADD4O, ADD8O, + AND, AND8, + AND8_rec, AND_rec, + ANDC, ANDC8, + ANDC8_rec, ANDC_rec, + ANDI8_rec, ANDI_rec, + ANDIS8_rec, ANDIS_rec, + CMPD, CMPW, + CMPB, CMPB8, + CMPDI, CMPWI, + CMPLD, CMPLW, + CMPLDI, CMPLWI, + CRAND, + CRANDC, + CRNAND, + CRNOR, + CROR, + CRORC, + CR6UNSET, CRUNSET, CRXOR, + EQV, EQV8, + EQV8_rec, EQV_rec, + EXTSWSLI, EXTSWSLI_32_64, + FCPSGND, FCPSGNS, + NAND, NAND8, + NAND8_rec, NAND_rec, + NOR, NOR8, + NOR8_rec, NOR_rec, + COPY, OR, OR8, + OR8_rec, OR_rec, + ORC, ORC8, + ORC8_rec, ORC_rec, + ORIS, ORIS8, + SLD, + SLW, SLW8, + SRAD, + SRADI, SRADI_32, + SRAW, + SRAWI, + SRD, + SRW, SRW8, + SUBF, SUBF8, + SUBF8_rec, SUBF_rec, + SUBFE, SUBFE8, + SUBFE8O, SUBFEO, + SUBFIC, SUBFIC8, + SUBF8O, SUBFO, + VADDUBM, + VADDUDM, + VADDUHM, + VADDUWM, + VAND, + VANDC, + VCMPEQUB, + VCMPEQUD, + VCMPEQUH, + VCMPEQUW, + VCMPGTSB, + VCMPGTSD, + VCMPGTSH, + VCMPGTSW, + VCMPGTUB, + VCMPGTUD, + VCMPGTUH, + VCMPGTUW, + VCMPNEB, + VCMPNEH, + VCMPNEW, + VCMPNEZB, + VCMPNEZH, + VCMPNEZW, + VEQV, + VMAXSB, + VMAXSD, + VMAXSH, + VMAXSW, + VMAXUB, + VMAXUD, + VMAXUH, + VMAXUW, + VMINSB, + VMINSD, + VMINSH, + VMINSW, + VMINUB, + VMINUD, + VMINUH, + VMINUW, + VMRGEW, + VMRGOW, + VNAND, + VNOR, + VOR, + VORC, + VRLB, + VRLD, + VRLDNM, + VRLH, + VRLW, + VRLWNM, + VSLB, + VSLD, + VSLH, + VSLW, + VSRAB, + VSRAD, + VSRAH, + VSRAW, + VSRB, + VSRD, + VSRH, + VSRW, + VSUBUBM, + VSUBUDM, + VSUBUHM, + VSUBUWM, + XOR, XOR8, + XOR8_rec, XOR_rec, + XORI, XORI8, + XORIS, XORIS8, + XSCPSGNDP, + XSCPSGNQP, + XSIEXPDP, + XSIEXPQP, + XVCPSGNDP, + XVCPSGNSP, + XVIEXPDP, + XVIEXPSP, + XXLAND, + XXLANDC, + XXLNAND, + XXLNOR, + XXLOR, XXLORf, + XXLORC +)>; + +// 3 Cycles ALU operations, 3 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read], + (instrs + ADDEX, ADDEX8, + DST, DST64, DSTT, DSTT64, + DSTST, DSTST64, DSTSTT, DSTSTT64, + ISEL, ISEL8, + RLDCL, + RLDCR, + RLDIC, + RLDICL, RLDICL_32, RLDICL_32_64, + RLDICR, RLDICR_32, + VRLDMI, + VRLWMI, + VSEL, + XXSEL +)>; + +// 3 Cycles ALU operations, 4 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read], + (instrs + RLDIMI, + RLWINM, RLWINM8, + RLWNM, RLWNM8 +)>; + +// 3 Cycles ALU operations, 5 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read], + (instrs + RLWIMI, RLWIMI8 +)>; + +// Single crack instructions +// 3 Cycles ALU operations, 0 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY], + (instrs + MFFS, + MFFS_rec, + MFFSL, + MFVSCR, + TRECHKPT +)>; + +// Single crack instructions +// 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read], + (instrs + ADDME8_rec, ADDME_rec, + ADDME8O_rec, ADDMEO_rec, + ADDZE8_rec, ADDZE_rec, + ADDZE8O_rec, ADDZEO_rec, + MCRFS, + MFFSCDRN, + MFFSCDRNI, + MFFSCRN, + MFFSCRNI, + MTFSB0, + MTVSCR, + NEG8O_rec, NEGO_rec, + SUBFME8_rec, SUBFME_rec, + SUBFME8O_rec, SUBFMEO_rec, + SUBFZE8_rec, SUBFZE_rec, + SUBFZE8O_rec, SUBFZEO_rec, + TABORT, + TBEGIN, + TRECLAIM, + TSR +)>; + +// Single crack instructions +// 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read], + (instrs + ADDE8_rec, ADDE_rec, + ADDE8O_rec, ADDEO_rec, + ADDIC_rec, + ADD4O_rec, ADD8O_rec, + SUBFE8_rec, SUBFE_rec, + SUBFE8O_rec, SUBFEO_rec, + SUBF8O_rec, SUBFO_rec +)>; + +// 2-way crack instructions +// 3 Cycles ALU operations, and 3 Cycles ALU operations, 0 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + HRFID, + MFFSCE, + RFID, + STOP +)>; + +// 2-way crack instructions +// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read], + (instrs + FABSD_rec, FABSS_rec, + FMR_rec, + FNABSD_rec, FNABSS_rec, + FNEGD_rec, FNEGS_rec, + MTFSB1, + RFEBB, + SC +)>; + +// 2-way crack instructions +// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read], + (instrs + ADDC, ADDC8, + ADDC8_rec, ADDC_rec, + ADDC8O, ADDCO, + FCPSGND_rec, FCPSGNS_rec, + MTFSF, MTFSFb, + MTFSFI, MTFSFIb, + SUBFC, SUBFC8, + SUBFC8_rec, SUBFC_rec, + SUBFC8O, SUBFCO +)>; + +// 2-way crack instructions +// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read], + (instrs + MTFSFI_rec +)>; + +// 2-way crack instructions +// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read], + (instrs + MTFSF_rec +)>; + +// 4-way crack instructions +// 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read], + (instrs + ADDC8O_rec, ADDCO_rec, + SUBFC8O_rec, SUBFCO_rec +)>; + +// 2-way crack instructions +// 3 Cycles ALU operations, and 4 Cycles Permute operations, 1 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY], + (instrs + VSTRIBL_rec, + VSTRIBR_rec, + VSTRIHL_rec, + VSTRIHR_rec +)>; + +// 2-way crack instructions +// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read], + (instrs + MTCRF, MTCRF8 +)>; + +// 6 Cycles Load operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read], + (instrs + LBZ, LBZ8, + LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD, + LDBRX, + DFLOADf32, DFLOADf64, LFD, + LFDX, XFLOADf32, XFLOADf64, + LFIWAX, LIWAX, + LFIWZX, LIWZX, + LHA, LHA8, + LHAX, LHAX8, + LHBRX, LHBRX8, + LHZ, LHZ8, + LVEBX, + LVEHX, + LVEWX, + LVX, + LVXL, + LWA, LWA_32, + LWAX, LWAX_32, + LWBRX, LWBRX8, + LWZ, LWZ8, LWZtoc, LWZtocL, + LXSD, + LXSDX, + LXSIBZX, + LXSIHZX, + LXSIWAX, + LXSIWZX, + LXV, + LXVB16X, + LXVD2X, + LXVDSX, + LXVH8X, + LXVRBX, + LXVRDX, + LXVRHX, + LXVRWX, + LXVW4X, + LXVWSX, + LXVX +)>; + +// 6 Cycles Load operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read], + (instrs + DCBT, + DCBTST, + ICBT, + LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32, + LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX, + LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32, + LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32, + LXVL, + LXVLL +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY], + (instrs + HASHCHK, + HASHCHKP +)>; + +// Single crack instructions +// 6 Cycles Load operations, 0 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY], + (instrs + SLBIA +)>; + +// Single crack instructions +// 6 Cycles Load operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read], + (instrs + DARN, + LBARX, LBARXL, + LDARX, LDARXL, + LHARX, LHARXL, + LWARX, LWARXL, + SLBFEE_rec, + SLBIE, + SLBMFEE, + SLBMFEV +)>; + +// Single crack instructions +// 6 Cycles Load operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read], + (instrs + LBZCIX, + LDCIX, + LHZCIX, + LWZCIX, + MTSPR, MTSPR8, MTSR, MTVRSAVE, MTVRSAVEv +)>; + +// Expand instructions +// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read], + (instrs + LMW +)>; + +// Expand instructions +// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read], + (instrs + LSWI +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY], + (instrs + LBZU, LBZU8, + LBZUX, LBZUX8, + LDU, + LDUX, + LFDU, + LFDUX, + LHAU, LHAU8, + LHAUX, LHAUX8, + LHZU, LHZU8, + LHZUX, LHZUX8, + LWAUX, + LWZU, LWZU8, + LWZUX, LWZUX8 +)>; + +// 6 Cycles Load operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read], + (instrs + PLBZ, PLBZ8, PLBZ8pc, PLBZpc, + PLD, PLDpc, + PLFD, PLFDpc, + PLFS, PLFSpc, + PLHA, PLHA8, PLHA8pc, PLHApc, + PLHZ, PLHZ8, PLHZ8pc, PLHZpc, + PLWA, PLWA8, PLWA8pc, PLWApc, + PLWZ, PLWZ8, PLWZ8pc, PLWZpc, + PLXSD, PLXSDpc, + PLXSSP, PLXSSPpc, + PLXV, PLXVpc, + PLXVP, PLXVPpc +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C], + (instrs + LFS, + LFSX, + LXSSP, + LXSSPX +)>; + +// 4-way crack instructions +// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY], + (instrs + LFSU, + LFSUX +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read], + (instrs + TLBIEL +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read, P10LD_Read], + (instrs + SLBMTE +)>; + +// 2-way crack instructions +// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C], + (instrs + LXVP, + LXVPX +)>; + +// Single crack instructions +// 13 Cycles Unknown operations, 1 input operands +def : InstRW<[P10W_MFL_13C, P10W_DISP_EVEN, P10W_DISP_ANY], + (instrs + MFSPR, MFSPR8, MFSR, MFTB8, MFVRSAVE, MFVRSAVEv +)>; + +// 10 Cycles SIMD Matrix Multiply Engine operations, 0 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_ANY], + (instrs + XXSETACCZ +)>; + +// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read], + (instrs + XVBF16GER2, + XVF16GER2, + XVF32GER, + XVF64GER, + XVI16GER2, + XVI16GER2S, + XVI4GER8, + XVI8GER4 +)>; + +// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read, P10MM_Read], + (instrs + XVBF16GER2NN, + XVBF16GER2NP, + XVBF16GER2PN, + XVBF16GER2PP, + XVF16GER2NN, + XVF16GER2NP, + XVF16GER2PN, + XVF16GER2PP, + XVF32GERNN, + XVF32GERNP, + XVF32GERPN, + XVF32GERPP, + XVF64GERNN, + XVF64GERNP, + XVF64GERPN, + XVF64GERPP, + XVI16GER2PP, + XVI16GER2SPP, + XVI4GER8PP, + XVI8GER4PP, + XVI8GER4SPP +)>; + +// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read], + (instrs + PMXVF32GER, + PMXVF64GER +)>; + +// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read], + (instrs + PMXVBF16GER2, + PMXVF16GER2, + PMXVF32GERNN, + PMXVF32GERNP, + PMXVF32GERPN, + PMXVF32GERPP, + PMXVF64GERNN, + PMXVF64GERNP, + PMXVF64GERPN, + PMXVF64GERPP, + PMXVI16GER2, + PMXVI16GER2S, + PMXVI4GER8, + PMXVI8GER4 +)>; + +// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read], + (instrs + PMXVBF16GER2NN, + PMXVBF16GER2NP, + PMXVBF16GER2PN, + PMXVBF16GER2PP, + PMXVF16GER2NN, + PMXVF16GER2NP, + PMXVF16GER2PN, + PMXVF16GER2PP, + PMXVI16GER2PP, + PMXVI16GER2SPP, + PMXVI4GER8PP, + PMXVI8GER4PP, + PMXVI8GER4SPP +)>; + +// 2-way crack instructions +// 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C], + (instrs + XXMTACC +)>; + +// 4-way crack instructions +// 10 Cycles SIMD Matrix Multiply Engine operations, 3 Cycles ALU operations, 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C, P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C], + (instrs + XXMFACC +)>; + +// 5 Cycles GPR Multiply operations, 2 input operands +def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read], + (instrs + MULHD, + MULHDU, + MULHW, + MULHWU, + MULLD, + MULLDO, + MULLI, MULLI8, + MULLW, + MULLWO, + VMULHSD, + VMULHUD, + VMULLD +)>; + +// 5 Cycles GPR Multiply operations, 3 input operands +def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read, P10MU_Read], + (instrs + MADDHD, + MADDHDU, + MADDLD, MADDLD8 +)>; + +// 2-way crack instructions +// 5 Cycles GPR Multiply operations, and 3 Cycles ALU operations, 2 input operands +def : InstRW<[P10W_MU_5C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + MULHD_rec, + MULHDU_rec, + MULHW_rec, + MULHWU_rec, + MULLD_rec, + MULLDO_rec, + MULLW_rec, + MULLWO_rec +)>; + +// 4 Cycles Permute operations, 0 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_ANY], + (instrs + VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH +)>; + +// 4 Cycles Permute operations, 1 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read], + (instrs + LVSL, + LVSR, + MFVSRLD, + MTVSRWS, + VCLZLSBB, + VCTZLSBB, + VGBBD, + VPRTYBQ, + VSPLTISB, + VSPLTISH, + VSTRIBL, + VSTRIBR, + VSTRIHL, + VSTRIHR, + VUPKHPX, + VUPKHSB, + VUPKHSH, + VUPKHSW, + VUPKLPX, + VUPKLSB, + VUPKLSH, + VUPKLSW, + XVCVBF16SPN, + XXBRD, + XXBRH, + XXBRQ, + XXBRW, + XXSPLTIB +)>; + +// 4 Cycles Permute operations, 2 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read], + (instrs + BPERMD, + MTVSRDD, + VBPERMD, + VBPERMQ, + VCLRLB, + VCLRRB, + VEXTRACTD, + VEXTRACTUB, + VEXTRACTUH, + VEXTRACTUW, + VEXTUBLX, + VEXTUBRX, + VEXTUHLX, + VEXTUHRX, + VEXTUWLX, + VEXTUWRX, + VINSERTD, + VINSERTW, + VMRGHB, + VMRGHH, + VMRGHW, + VMRGLB, + VMRGLH, + VMRGLW, + VPKPX, + VPKSDSS, + VPKSDUS, + VPKSHSS, + VPKSHUS, + VPKSWSS, + VPKSWUS, + VPKUDUM, + VPKUDUS, + VPKUHUM, + VPKUHUS, + VPKUWUM, + VPKUWUS, + VSL, + VSLO, + VSLV, + VSPLTB, VSPLTBs, + VSPLTH, VSPLTHs, + VSPLTW, + VSR, + VSRO, + VSRV, + XXEXTRACTUW, + XXGENPCVDM, + XXGENPCVHM, + XXGENPCVWM, + XXMRGHW, + XXMRGLW, + XXPERM, + XXPERMDI, XXPERMDIs, + XXPERMR, + XXSLDWI, XXSLDWIs, + XXSPLTW, XXSPLTWs +)>; + +// 4 Cycles Permute operations, 3 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read], + (instrs + VEXTDDVLX, + VEXTDDVRX, + VEXTDUBVLX, + VEXTDUBVRX, + VEXTDUHVLX, + VEXTDUHVRX, + VEXTDUWVLX, + VEXTDUWVRX, + VINSBLX, + VINSBRX, + VINSBVLX, + VINSBVRX, + VINSD, + VINSDLX, + VINSDRX, + VINSERTB, + VINSERTH, + VINSHLX, + VINSHRX, + VINSHVLX, + VINSHVRX, + VINSW, + VINSWLX, + VINSWRX, + VINSWVLX, + VINSWVRX, + VPERM, + VPERMR, + VPERMXOR, + VSLDBI, + VSLDOI, + VSRDBI, + XXINSERTW +)>; + +// 2-way crack instructions +// 4 Cycles Permute operations, and 7 Cycles VMX Multiply operations, 2 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_EVEN, P10W_vMU_7C, P10W_DISP_ANY], + (instrs + VSUMSWS +)>; + +// 4 Cycles Permute operations, 1 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read], + (instrs + XXSPLTIDP, + XXSPLTIW +)>; + +// 4 Cycles Permute operations, 3 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read], + (instrs + XXBLENDVB, + XXBLENDVD, + XXBLENDVH, + XXBLENDVW, + XXSPLTI32DX +)>; + +// 4 Cycles Permute operations, 4 input operands +def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read], + (instrs + XXEVAL, + XXPERMX +)>; + +// 3 Cycles Store operations, 1 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read], + (instrs + DCBST, + DCBZ, + ICBI +)>; + +// 3 Cycles Store operations, 2 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read], + (instrs + DCBF, + PSTXVP, PSTXVPpc, + STB, STB8, + STBU, STBU8, + STBUX, STBUX8, + SPILLTOVSR_ST, STD, + STDBRX, + STDU, + STDUX, + DFSTOREf32, DFSTOREf64, STFD, + STFDU, + STFDUX, + STFDX, + STFIWX, STIWX, + STFS, + STFSU, + STFSUX, + STFSX, + STH, STH8, + STHBRX, + STHU, STHU8, + STHUX, STHUX8, + STVEBX, + STVEHX, + STVEWX, + STVX, + STVXL, + STW, STW8, + STWBRX, + STWU, STWU8, + STWUX, STWUX8, + STXSD, + STXSDX, + STXSIBX, STXSIBXv, + STXSIHX, STXSIHXv, + STXSIWX, + STXSSP, + STXSSPX, + STXV, + STXVB16X, + STXVD2X, + STXVH8X, + STXVRBX, + STXVRDX, + STXVRHX, + STXVRWX, + STXVW4X, + STXVX +)>; + +// 3 Cycles Store operations, 3 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read], + (instrs + CP_COPY, CP_COPY8, + STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32, + SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_, + STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32, + STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32, + STXVL, + STXVLL +)>; + +// Single crack instructions +// 3 Cycles Store operations, 0 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY], + (instrs + EnforceIEIO, + MSGSYNC, + SLBSYNC, + TCHECK, + TLBSYNC +)>; + +// Single crack instructions +// 3 Cycles Store operations, 1 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read], + (instrs + TEND +)>; + +// Single crack instructions +// 3 Cycles Store operations, 2 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read], + (instrs + SLBIEG, + STBCX, + STDCX, + STHCX, + STWCX, + TLBIE +)>; + +// Single crack instructions +// 3 Cycles Store operations, 3 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read], + (instrs + CP_PASTE8_rec, CP_PASTE_rec, + STBCIX, + STDCIX, + STHCIX, + STWCIX +)>; + +// 2-way crack instructions +// 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + ISYNC +)>; + +// 2-way crack instructions +// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + SYNC +)>; + +// Expand instructions +// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY], + (instrs + LDAT, + LWAT +)>; + +// 4-way crack instructions +// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY], + (instrs + STDAT, + STWAT +)>; + +// Expand instructions +// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read], + (instrs + STMW +)>; + +// Expand instructions +// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read], + (instrs + STSWI +)>; + +// 3 Cycles Store operations, 2 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read], + (instrs + PSTB, PSTB8, PSTB8pc, PSTBpc, + PSTD, PSTDpc, + PSTFD, PSTFDpc, + PSTFS, PSTFSpc, + PSTH, PSTH8, PSTH8pc, PSTHpc, + PSTW, PSTW8, PSTW8pc, PSTWpc, + PSTXSD, PSTXSDpc, + PSTXSSP, PSTXSSPpc, + PSTXV, PSTXVpc +)>; + +// 2-way crack instructions +// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands +def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read], + (instrs + STXVP, + STXVPX +)>; + +// FIXME - Miss scheduling information from datasheet +// Temporary set it as 1 Cycles Simple Fixed-point (SFX) operations, 0 input operands +def : InstRW<[P10W_SX, P10W_DISP_ANY], + (instrs + ATTN, + CP_ABORT, + DCBA, + DCBI, + DCBZL, + DCCCI, + ICBLC, + ICBLQ, + ICBTLS, + ICCCI, + LA, + LDMX, + MFDCR, + MFPMR, + MFSRIN, + MSYNC, + MTDCR, + MTPMR, + MTSRIN, + NAP, + TLBIA, + TLBLD, + TLBLI, + TLBRE2, + TLBSX2, + TLBSX2D, + TLBWE2 +)>; + +// Single crack instructions +// 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands +def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY], + (instrs + CLRBHRB, + MFMSR +)>; + +// Single crack instructions +// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands +def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read], + (instrs + MFTB +)>; + +// Single crack instructions +// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands +def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read], + (instrs + MFBHRBE, + MTMSR, + MTMSRD +)>; + +// 2-way crack instructions +// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands +def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], + (instrs + ADDPCIS +)>; + +// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands +def : InstRW<[P10W_SX_3C, P10W_DISP_PAIR, P10SX_Read], + (instrs + PADDI, PADDI8, PADDI8pc, PADDIpc, PLI, PLI8 +)>; + +// 7 Cycles VMX Multiply operations, 2 input operands +def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read], + (instrs + VMULESB, + VMULESD, + VMULESH, + VMULESW, + VMULEUB, + VMULEUD, + VMULEUH, + VMULEUW, + VMULHSW, + VMULHUW, + VMULOSB, + VMULOSD, + VMULOSH, + VMULOSW, + VMULOUB, + VMULOUD, + VMULOUH, + VMULOUW, + VMULUWM, + VSUM2SWS, + VSUM4SBS, + VSUM4SHS, + VSUM4UBS +)>; + +// 7 Cycles VMX Multiply operations, 3 input operands +def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read, P10vMU_Read], + (instrs + VMHADDSHS, + VMHRADDSHS, + VMLADDUHM, + VMSUMCUD, + VMSUMMBM, + VMSUMSHM, + VMSUMSHS, + VMSUMUBM, + VMSUMUDM, + VMSUMUHM, + VMSUMUHS +)>; diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -590,8 +590,7 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>; def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>; -// No scheduler model yet. -def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>; +def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>; // No scheduler model for future CPU. def : ProcessorModel<"future", NoSchedModel, ProcessorFeatures.FutureFeatures>; diff --git a/llvm/lib/Target/PowerPC/PPCSchedPredicates.td b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td @@ -0,0 +1,294 @@ +//===--- PPCSchedPredicates.td - PowerPC Scheduling Preds -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Automatically generated file, do not edit! +// +// This file defines scheduling predicate definitions that are used by the +// PowerPC subtargets. +//===----------------------------------------------------------------------===// +// Identify instructions that write BF pipelines with 7 cycles. +def P10W_BF_7C_Pred : MCSchedPredicate< + CheckOpcode<[FADD, + FADDS, + FADDS_rec, + FADD_rec, + FCFID, + FCFIDS, + FCFIDS_rec, + FCFIDU, + FCFIDUS, + FCFIDUS_rec, + FCFIDU_rec, + FCFID_rec, + FCTID, + FCTIDU, + FCTIDUZ, + FCTIDUZ_rec, + FCTIDU_rec, + FCTIDZ, + FCTIDZ_rec, + FCTID_rec, + FCTIW, + FCTIWU, + FCTIWUZ, + FCTIWUZ_rec, + FCTIWU_rec, + FCTIWZ, + FCTIWZ_rec, + FCTIW_rec, + FMADD, + FMADDS, + FMADDS_rec, + FMADD_rec, + FMSUB, + FMSUBS, + FMSUBS_rec, + FMSUB_rec, + FMUL, + FMULS, + FMULS_rec, + FMUL_rec, + FNMADD, + FNMADDS, + FNMADDS_rec, + FNMADD_rec, + FNMSUB, + FNMSUBS, + FNMSUBS_rec, + FNMSUB_rec, + FRE, + FRES, + FRES_rec, + FRE_rec, + FRIMD, FRIMS, + FRIMD_rec, FRIMS_rec, + FRIND, FRINS, + FRIND_rec, FRINS_rec, + FRIPD, FRIPS, + FRIPD_rec, FRIPS_rec, + FRIZD, FRIZS, + FRIZD_rec, FRIZS_rec, + FRSP, + FRSP_rec, + FRSQRTE, + FRSQRTES, + FRSQRTES_rec, + FRSQRTE_rec, + FSELD, FSELS, + FSELD_rec, FSELS_rec, + FSUB, + FSUBS, + FSUBS_rec, + FSUB_rec, + VADDFP, + VCFSX, VCFSX_0, + VCFUX, VCFUX_0, + VCTSXS, VCTSXS_0, + VCTUXS, VCTUXS_0, + VEXPTEFP, + VEXPTEFP, + VLOGEFP, + VMADDFP, + VNMSUBFP, + VREFP, + VRFIM, + VRFIN, + VRFIP, + VRFIZ, + VRSQRTEFP, + VSUBFP, + XSADDDP, + XSADDSP, + XSCVDPHP, + XSCVDPSP, + XSCVDPSPN, + XSCVDPSXDS, XSCVDPSXDSs, + XSCVDPSXWS, XSCVDPSXWSs, + XSCVDPUXDS, XSCVDPUXDSs, + XSCVDPUXWS, XSCVDPUXWSs, + XSCVSPDP, + XSCVSXDDP, + XSCVSXDSP, + XSCVUXDDP, + XSCVUXDSP, + XSMADDADP, + XSMADDASP, + XSMADDMDP, + XSMADDMSP, + XSMSUBADP, + XSMSUBASP, + XSMSUBMDP, + XSMSUBMSP, + XSMULDP, + XSMULSP, + XSNMADDADP, + XSNMADDASP, + XSNMADDMDP, + XSNMADDMSP, + XSNMSUBADP, + XSNMSUBASP, + XSNMSUBMDP, + XSNMSUBMSP, + XSRDPI, + XSRDPIC, + XSRDPIM, + XSRDPIP, + XSRDPIZ, + XSREDP, + XSRESP, + XSRSP, + XSRSQRTEDP, + XSRSQRTESP, + XSSUBDP, + XSSUBSP, + XVADDDP, + XVADDSP, + XVCVDPSP, + XVCVDPSXDS, + XVCVDPSXWS, + XVCVDPUXDS, + XVCVDPUXWS, + XVCVSPBF16, + XVCVSPDP, + XVCVSPHP, + XVCVSPSXDS, + XVCVSPSXWS, + XVCVSPUXDS, + XVCVSPUXWS, + XVCVSXDDP, + XVCVSXDSP, + XVCVSXWDP, + XVCVSXWSP, + XVCVUXDDP, + XVCVUXDSP, + XVCVUXWDP, + XVCVUXWSP, + XVMADDADP, + XVMADDASP, + XVMADDMDP, + XVMADDMSP, + XVMSUBADP, + XVMSUBASP, + XVMSUBMDP, + XVMSUBMSP, + XVMULDP, + XVMULSP, + XVNMADDADP, + XVNMADDASP, + XVNMADDMDP, + XVNMADDMSP, + XVNMSUBADP, + XVNMSUBASP, + XVNMSUBMDP, + XVNMSUBMSP, + XVRDPI, + XVRDPIC, + XVRDPIM, + XVRDPIP, + XVRDPIZ, + XVREDP, + XVRESP, + XVRSPI, + XVRSPIC, + XVRSPIM, + XVRSPIP, + XVRSPIZ, + XVRSQRTEDP, + XVRSQRTESP, + XVSUBDP, + XVSUBSP]> +>; + +// Identify instructions that write CY pipelines with 7 cycles. +def P10W_CY_7C_Pred : MCSchedPredicate< + CheckOpcode<[CFUGED, + CNTLZDM, + CNTTZDM, + PDEPD, + PEXTD, + VCFUGED, + VCIPHER, + VCIPHERLAST, + VCLZDM, + VCTZDM, + VGNB, + VNCIPHER, + VNCIPHERLAST, + VPDEPD, + VPEXTD, + VPMSUMB, + VPMSUMD, + VPMSUMH, + VPMSUMW, + VSBOX]> +>; + +// Identify instructions that write MM pipelines with 10 cycles. +def P10W_MM_10C_Pred : MCSchedPredicate< + CheckOpcode<[PMXVBF16GER2, + PMXVBF16GER2NN, + PMXVBF16GER2NP, + PMXVBF16GER2PN, + PMXVBF16GER2PP, + PMXVF16GER2, + PMXVF16GER2NN, + PMXVF16GER2NP, + PMXVF16GER2PN, + PMXVF16GER2PP, + PMXVF32GER, + PMXVF32GERNN, + PMXVF32GERNP, + PMXVF32GERPN, + PMXVF32GERPP, + PMXVF64GER, + PMXVF64GERNN, + PMXVF64GERNP, + PMXVF64GERPN, + PMXVF64GERPP, + PMXVI16GER2, + PMXVI16GER2PP, + PMXVI16GER2S, + PMXVI16GER2SPP, + PMXVI4GER8, + PMXVI4GER8PP, + PMXVI8GER4, + PMXVI8GER4PP, + PMXVI8GER4SPP, + XVBF16GER2, + XVBF16GER2NN, + XVBF16GER2NP, + XVBF16GER2PN, + XVBF16GER2PP, + XVF16GER2, + XVF16GER2NN, + XVF16GER2NP, + XVF16GER2PN, + XVF16GER2PP, + XVF32GER, + XVF32GERNN, + XVF32GERNP, + XVF32GERPN, + XVF32GERPP, + XVF64GER, + XVF64GERNN, + XVF64GERNP, + XVF64GERPN, + XVF64GERPP, + XVI16GER2, + XVI16GER2PP, + XVI16GER2S, + XVI16GER2SPP, + XVI4GER8, + XVI4GER8PP, + XVI8GER4, + XVI8GER4PP, + XVI8GER4SPP, + XXMFACC, + XXMFACC, + XXMTACC, + XXSETACCZ]> +>; diff --git a/llvm/lib/Target/PowerPC/PPCSchedule.td b/llvm/lib/Target/PowerPC/PPCSchedule.td --- a/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -128,7 +128,9 @@ //===----------------------------------------------------------------------===// // Processor instruction itineraries. +include "PPCInstrInfo.td" +include "PPCSchedPredicates.td" include "PPCScheduleG3.td" include "PPCSchedule440.td" include "PPCScheduleG4.td" @@ -137,6 +139,7 @@ include "PPCScheduleP7.td" include "PPCScheduleP8.td" include "PPCScheduleP9.td" +include "PPCScheduleP10.td" include "PPCScheduleA2.td" include "PPCScheduleE500.td" include "PPCScheduleE500mc.td" diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td @@ -0,0 +1,416 @@ +//===--- PPCScheduleP10.td - P10 Scheduling Definitions -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Automatically generated file, do not edit! +// +// This file defines the resources required by P10 instructions. +//===----------------------------------------------------------------------===// +// Modeling pipeline forwarding logic. +def P10BR_Read : SchedRead; +def P10DF_Read : SchedRead; +def P10DV_Read : SchedRead; +def P10DX_Read : SchedRead; +def P10F2_Read : SchedRead; +def P10FX_Read : SchedRead; +def P10LD_Read : SchedRead; +def P10MU_Read : SchedRead; +def P10PM_Read : SchedRead; +def P10ST_Read : SchedRead; +def P10SX_Read : SchedRead; +def P10vMU_Read : SchedRead; + +def P10Model : SchedMachineModel { + let IssueWidth = 8; + + // TODO - Need to be updated according to P10 UM. + let MicroOpBufferSize = 44; + + // TODO - tune this on real HW once it arrives. For now, we will use the same + // value as we do on P9. + let LoopMicroOpBufferSize = 60; + + let CompleteModel = 1; + + // Do not support SPE (Signal Procesing Engine) on Power 10. + let UnsupportedFeatures = [HasSPE, IsE500, IsBookE]; +} + +let SchedModel = P10Model in { + + // ***************** Processor Resources ***************** + + // Pipeline Groups + + def P10_BF : ProcResource<4>; // Four Binary Floating Point pipelines. + def P10_BR : ProcResource<2>; // Two Branch pipelines. + def P10_CY : ProcResource<4>; // Four Crypto pipelines. + def P10_DF : ProcResource<1>; // One Decimal Floating Point pipelines. + def P10_DV : ProcResource<2>; // Two Fixed-point divide (DIV) pipelines. + def P10_DX : ProcResource<2>; // Two 128-bit fixed-point and BCD pipelines. + def P10_FX : ProcResource<4>; // Four ALU pipelines. + def P10_LD : ProcResource<2>; // Two Load pipelines. + def P10_MM : ProcResource<2>; // Two 512-bit SIMD matrix multiply engine pipelines. + def P10_PM : ProcResource<4>; // Four 128-bit permute (PM) pipelines. + def P10_ST : ProcResource<2>; // Two ST-D pipelines. + def P10_SX : ProcResource<2>; // Two Simple Fixed-point (SFX) pipelines. + + // Dispatch Groups + + // Dispatch to any slots + def P10_ANY_SLOT : ProcResource<8>; + + let Super = P10_ANY_SLOT in { + + // Dispatch to even slots + def P10_EVEN_SLOT : ProcResource<4>; + + // Dispatch to odd slots + def P10_ODD_SLOT : ProcResource<4>; + } + + // Dispatch Rules + let NumMicroOps = 0, Latency = 1 in { + // Dispatch Rule '-' + def P10W_DISP_ANY : SchedWriteRes<[P10_ANY_SLOT]>; + + // Dispatch Rule '-', even slot + def P10W_DISP_EVEN : SchedWriteRes<[P10_EVEN_SLOT]>; + + // Dispatch Rule 'P' + def P10W_DISP_PAIR : SchedWriteRes<[P10_EVEN_SLOT, P10_ODD_SLOT]>; + } + + // ***************** SchedWriteRes Definitions ***************** + + // A BF pipeline may take from 7 to 36 cycles to complete. + // Some BF operations may keep the pipeline busy for up to 10 cycles. + def P10W_BF_7C : SchedWriteRes<[P10_BF]> { + let Latency = 7; + } + + def P10W_BF_22C : SchedWriteRes<[P10_BF]> { + let ResourceCycles = [ 5 ]; + let Latency = 22; + } + + def P10W_BF_24C : SchedWriteRes<[P10_BF]> { + let ResourceCycles = [ 8 ]; + let Latency = 24; + } + + def P10W_BF_26C : SchedWriteRes<[P10_BF]> { + let ResourceCycles = [ 5 ]; + let Latency = 26; + } + + def P10W_BF_27C : SchedWriteRes<[P10_BF]> { + let ResourceCycles = [ 7 ]; + let Latency = 27; + } + + def P10W_BF_36C : SchedWriteRes<[P10_BF]> { + let ResourceCycles = [ 10 ]; + let Latency = 36; + } + + // A BR pipeline may take 2 cycles to complete. + def P10W_BR_2C : SchedWriteRes<[P10_BR]> { + let Latency = 2; + } + + // A CY pipeline may take 7 cycles to complete. + def P10W_CY_7C : SchedWriteRes<[P10_CY]> { + let Latency = 7; + } + + // A DF pipeline may take from 13 to 174 cycles to complete. + // Some DF operations may keep the pipeline busy for up to 67 cycles. + def P10W_DF_13C : SchedWriteRes<[P10_DF]> { + let Latency = 13; + } + + def P10W_DF_24C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 16 ]; + let Latency = 24; + } + + def P10W_DF_25C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 17 ]; + let Latency = 25; + } + + def P10W_DF_26C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 18 ]; + let Latency = 26; + } + + def P10W_DF_32C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 22 ]; + let Latency = 32; + } + + def P10W_DF_33C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 25 ]; + let Latency = 33; + } + + def P10W_DF_34C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 25 ]; + let Latency = 34; + } + + def P10W_DF_38C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 30 ]; + let Latency = 38; + } + + def P10W_DF_40C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 17 ]; + let Latency = 40; + } + + def P10W_DF_43C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 34 ]; + let Latency = 43; + } + + def P10W_DF_59C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 49 ]; + let Latency = 59; + } + + def P10W_DF_61C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 12 ]; + let Latency = 61; + } + + def P10W_DF_68C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 15 ]; + let Latency = 68; + } + + def P10W_DF_77C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 67 ]; + let Latency = 77; + } + + def P10W_DF_87C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 12 ]; + let Latency = 87; + } + + def P10W_DF_100C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 32 ]; + let Latency = 100; + } + + def P10W_DF_174C : SchedWriteRes<[P10_DF]> { + let ResourceCycles = [ 33 ]; + let Latency = 174; + } + + // A DV pipeline may take from 20 to 83 cycles to complete. + // Some DV operations may keep the pipeline busy for up to 33 cycles. + def P10W_DV_20C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 10 ]; + let Latency = 20; + } + + def P10W_DV_25C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 10 ]; + let Latency = 25; + } + + def P10W_DV_27C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 10 ]; + let Latency = 27; + } + + def P10W_DV_41C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 10 ]; + let Latency = 41; + } + + def P10W_DV_43C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 21 ]; + let Latency = 43; + } + + def P10W_DV_47C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 21 ]; + let Latency = 47; + } + + def P10W_DV_54C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 33 ]; + let Latency = 54; + } + + def P10W_DV_60C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 33 ]; + let Latency = 60; + } + + def P10W_DV_75C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 21 ]; + let Latency = 75; + } + + def P10W_DV_83C : SchedWriteRes<[P10_DV]> { + let ResourceCycles = [ 33 ]; + let Latency = 83; + } + + // A DX pipeline may take 5 cycles to complete. + def P10W_DX_5C : SchedWriteRes<[P10_DX]> { + let Latency = 5; + } + + // A F2 pipeline may take 4 cycles to complete. + def P10W_F2_4C : SchedWriteRes<[P10_FX]> { + let Latency = 4; + } + + // A FX pipeline may take from 2 to 3 cycles to complete. + def P10W_FX_2C : SchedWriteRes<[P10_FX]> { + let Latency = 2; + } + + def P10W_FX_3C : SchedWriteRes<[P10_FX]> { + let Latency = 3; + } + + // A LD pipeline may take 6 cycles to complete. + def P10W_LD_6C : SchedWriteRes<[P10_LD]> { + let Latency = 6; + } + + // A MF pipeline may take 13 cycles to complete. + def P10W_MF_13C : SchedWriteRes<[P10_SX]> { + let Latency = 13; + } + + // A MFL pipeline may take 13 cycles to complete. + def P10W_MFL_13C : SchedWriteRes<[P10_SX]> { + let Latency = 13; + } + + // A MM pipeline may take 10 cycles to complete. + def P10W_MM_10C : SchedWriteRes<[P10_MM]> { + let Latency = 10; + } + + // A MU pipeline may take 5 cycles to complete. + def P10W_MU_5C : SchedWriteRes<[P10_BF]> { + let Latency = 5; + } + + // A PM pipeline may take 4 cycles to complete. + def P10W_PM_4C : SchedWriteRes<[P10_PM]> { + let Latency = 4; + } + + // A ST pipeline may take 3 cycles to complete. + def P10W_ST_3C : SchedWriteRes<[P10_ST]> { + let Latency = 3; + } + + // A SX pipeline may take from 0 to 3 cycles to complete. + def P10W_SX : SchedWriteRes<[P10_SX]> { + let Latency = 0; + } + + def P10W_SX_3C : SchedWriteRes<[P10_SX]> { + let Latency = 3; + } + + // A vMU pipeline may take 7 cycles to complete. + def P10W_vMU_7C : SchedWriteRes<[P10_BF]> { + let Latency = 7; + } + + // ***************** Read Advance Definitions ***************** + + // Modeling pipeline forwarding logic. + def P10BF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>; + def P10BF_Read_2C : SchedReadAdvance<2, [P10W_BF_7C]>; + def P10BR_Read_1C : SchedReadAdvance<1, [P10W_FX_3C, P10W_F2_4C]>; + def P10CY_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_DF_13C, P10W_MM_10C]>; + def P10CY_Read_3C : SchedReadAdvance<3, [P10W_CY_7C]>; + def P10DF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>; + def P10DV_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>; + def P10DX_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>; + def P10F2_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>; + def P10FX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>; + def P10LD_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C]>; + def P10MM_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C]>; + def P10MM_Read_6C : SchedReadAdvance<6, [P10W_MM_10C]>; + def P10MU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_DF_13C]>; + def P10PM_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>; + def P10ST_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>; + def P10SX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C, P10W_MM_10C]>; + def P10vMU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>; + + // Save 1 cycles if pipeline BF reads the data from pipelines DX, MU, vMU, CY, DF, MM. + // Save 2 cycles if pipeline BF reads the data from pipelines BF. + def P10BF_Read : SchedReadVariant<[ + SchedVar, + SchedVar + ]>; + + // Save 1 cycles if pipeline CY reads the data from pipelines DX, MU, vMU, BF, DF, MM. + // Save 3 cycles if pipeline CY reads the data from pipelines CY. + def P10CY_Read : SchedReadVariant<[ + SchedVar, + SchedVar + ]>; + + // Save 1 cycles if pipeline MM reads the data from pipelines DX, MU, vMU, BF, CY, DF. + // Save 6 cycles if pipeline MM reads the data from pipelines MM. + def P10MM_Read : SchedReadVariant<[ + SchedVar, + SchedVar + ]>; + + // Save 1 cycles if pipeline BR reads the data from pipelines FX, F2. + def : SchedAlias; + + // Save 1 cycles if pipeline DF reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM. + def : SchedAlias; + + // Save 1 cycles if pipeline DV reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM. + def : SchedAlias; + + // Save 1 cycles if pipeline DX reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM. + def : SchedAlias; + + // Save 1 cycles if pipeline F2 reads the data from pipelines ST, SX, FX, F2, PM. + def : SchedAlias; + + // Save 1 cycles if pipeline FX reads the data from pipelines ST, SX, FX, F2, PM. + def : SchedAlias; + + // Save 1 cycles if pipeline LD reads the data from pipelines ST, SX, FX, F2. + def : SchedAlias; + + // Save 1 cycles if pipeline MU reads the data from pipelines DX, MU, DF. + def : SchedAlias; + + // Save 1 cycles if pipeline PM reads the data from pipelines ST, SX, FX, F2, PM. + def : SchedAlias; + + // Save 1 cycles if pipeline ST reads the data from pipelines ST, SX, FX, F2, PM. + def : SchedAlias; + + // Save 1 cycles if pipeline SX reads the data from pipelines ST, SX, FX, F2, PM, MM. + def : SchedAlias; + + // Save 1 cycles if pipeline vMU reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM. + def : SchedAlias; + + include "P10InstrResources.td" +} diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -9,8 +9,6 @@ // This file defines the itinerary class data for the POWER9 processor. // //===----------------------------------------------------------------------===// -include "PPCInstrInfo.td" - def P9Model : SchedMachineModel { // The maximum number of instructions to be issued at the same time. // While a value of 8 is technically correct since 8 instructions can be diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -105,8 +105,8 @@ ; ; CHECK-BE-LABEL: vec_xst_trunc_ss: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, r6, r3 ; CHECK-BE-NEXT: blr ; @@ -136,8 +136,8 @@ ; ; CHECK-BE-LABEL: vec_xst_trunc_us: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, r6, r3 ; CHECK-BE-NEXT: blr ; @@ -167,8 +167,8 @@ ; ; CHECK-BE-LABEL: vec_xst_trunc_si: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-BE-NEXT: stfiwx f0, r6, r3 ; CHECK-BE-NEXT: blr ; @@ -198,8 +198,8 @@ ; ; CHECK-BE-LABEL: vec_xst_trunc_ui: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-BE-NEXT: stfiwx f0, r6, r3 ; CHECK-BE-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/constant-pool.ll b/llvm/test/CodeGen/PowerPC/constant-pool.ll --- a/llvm/test/CodeGen/PowerPC/constant-pool.ll +++ b/llvm/test/CodeGen/PowerPC/constant-pool.ll @@ -364,15 +364,15 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: xxlxor f4, f4, f4 ; CHECK-NEXT: xxsplti32dx vs3, 0, 1074935889 +; CHECK-NEXT: xxlxor f4, f4, f4 ; CHECK-NEXT: stxv vs63, 32(r1) # 16-byte Folded Spill ; CHECK-NEXT: xxsplti32dx vs63, 0, 1074935889 ; CHECK-NEXT: xxsplti32dx vs3, 1, -343597384 ; CHECK-NEXT: # kill: def $f3 killed $f3 killed $vsl3 ; CHECK-NEXT: bl __gcc_qadd@notoc -; CHECK-NEXT: xxlxor f4, f4, f4 ; CHECK-NEXT: xxsplti32dx vs3, 0, 1074935889 +; CHECK-NEXT: xxlxor f4, f4, f4 ; CHECK-NEXT: xxsplti32dx vs3, 1, -1719329096 ; CHECK-NEXT: # kill: def $f3 killed $f3 killed $vsl3 ; CHECK-NEXT: bl __gcc_qadd@notoc diff --git a/llvm/test/CodeGen/PowerPC/int128_ldst.ll b/llvm/test/CodeGen/PowerPC/int128_ldst.ll --- a/llvm/test/CodeGen/PowerPC/int128_ldst.ll +++ b/llvm/test/CodeGen/PowerPC/int128_ldst.ll @@ -420,8 +420,8 @@ ; CHECK-P10-NEXT: rldicr 4, 3, 0, 23 ; CHECK-P10-NEXT: pli 5, 232 ; CHECK-P10-NEXT: pli 3, 3567587329 -; CHECK-P10-NEXT: pli 6, 3567587337 ; CHECK-P10-NEXT: rldimi 3, 5, 32, 0 +; CHECK-P10-NEXT: pli 6, 3567587337 ; CHECK-P10-NEXT: rldimi 6, 5, 32, 0 ; CHECK-P10-NEXT: ldx 3, 4, 3 ; CHECK-P10-NEXT: ldx 4, 4, 6 @@ -465,8 +465,8 @@ ; CHECK-P10-NEXT: rldicr 4, 3, 0, 23 ; CHECK-P10-NEXT: pli 3, 244140625 ; CHECK-P10-NEXT: pli 5, 232 -; CHECK-P10-NEXT: pli 6, 3567587336 ; CHECK-P10-NEXT: rldic 3, 3, 12, 24 +; CHECK-P10-NEXT: pli 6, 3567587336 ; CHECK-P10-NEXT: rldimi 6, 5, 32, 0 ; CHECK-P10-NEXT: ldx 3, 4, 3 ; CHECK-P10-NEXT: ldx 4, 4, 6 @@ -584,8 +584,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: pli 4, 232 ; CHECK-P10-NEXT: pli 3, 3567587329 -; CHECK-P10-NEXT: pli 5, 3567587337 ; CHECK-P10-NEXT: rldimi 3, 4, 32, 0 +; CHECK-P10-NEXT: pli 5, 3567587337 ; CHECK-P10-NEXT: rldimi 5, 4, 32, 0 ; CHECK-P10-NEXT: ld 3, 0(3) ; CHECK-P10-NEXT: ld 4, 0(5) diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll --- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll @@ -26,10 +26,10 @@ ; CHECK-NEXT: xxlor vs0, v2, v2 ; CHECK-NEXT: xxlor vs1, v3, v3 ; CHECK-NEXT: stxvp vsp34, 128(r1) # 32-byte Folded Spill -; CHECK-NEXT: ld r30, 272(r1) -; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill ; CHECK-NEXT: xxlor vs2, v4, v4 ; CHECK-NEXT: xxlor vs3, v5, v5 +; CHECK-NEXT: ld r30, 272(r1) +; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill ; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: xvf16ger2pp acc0, v2, v4 ; CHECK-NEXT: xxmfacc acc0 @@ -38,9 +38,9 @@ ; CHECK-NEXT: bl foo@notoc ; CHECK-NEXT: lxvp vsp0, 64(r1) ; CHECK-NEXT: lxvp vsp2, 32(r1) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxvp vsp34, 128(r1) # 32-byte Folded Reload ; CHECK-NEXT: lxvp vsp36, 96(r1) # 32-byte Folded Reload -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: xvf16ger2pp acc0, v2, v4 ; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r30) @@ -69,10 +69,10 @@ ; CHECK-BE-NEXT: xxlor vs0, v2, v2 ; CHECK-BE-NEXT: xxlor vs1, v3, v3 ; CHECK-BE-NEXT: stxvp vsp34, 208(r1) # 32-byte Folded Spill -; CHECK-BE-NEXT: ld r30, 368(r1) ; CHECK-BE-NEXT: xxlor vs2, v4, v4 ; CHECK-BE-NEXT: xxlor vs3, v5, v5 ; CHECK-BE-NEXT: stxvp vsp36, 176(r1) # 32-byte Folded Spill +; CHECK-BE-NEXT: ld r30, 368(r1) ; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4 ; CHECK-BE-NEXT: xxmfacc acc0 @@ -82,9 +82,9 @@ ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: lxvp vsp0, 112(r1) ; CHECK-BE-NEXT: lxvp vsp2, 144(r1) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxvp vsp34, 208(r1) # 32-byte Folded Reload ; CHECK-BE-NEXT: lxvp vsp36, 176(r1) # 32-byte Folded Reload -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4 ; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r30) diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -409,29 +409,29 @@ ; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1 ; CHECK-NEXT: lxv vs0, 32(r7) ; CHECK-NEXT: lxv vs1, 48(r7) -; CHECK-NEXT: xxmfacc acc2 ; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1 ; CHECK-NEXT: lxv vs12, 64(r7) ; CHECK-NEXT: lxv vs13, 80(r7) -; CHECK-NEXT: rldic r7, r4, 6, 26 ; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: rldic r7, r4, 6, 26 ; CHECK-NEXT: addi r4, r4, 3 -; CHECK-NEXT: xxmfacc acc1 +; CHECK-NEXT: add r8, r3, r7 +; CHECK-NEXT: xxmfacc acc2 ; CHECK-NEXT: xvf32gernp acc0, vs12, vs13 ; CHECK-NEXT: stxvx vs11, r3, r7 -; CHECK-NEXT: add r7, r3, r7 +; CHECK-NEXT: stxv vs8, 48(r8) +; CHECK-NEXT: xxmfacc acc1 +; CHECK-NEXT: stxv vs9, 32(r8) +; CHECK-NEXT: stxv vs10, 16(r8) +; CHECK-NEXT: stxv vs4, 112(r8) +; CHECK-NEXT: stxv vs5, 96(r8) ; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs8, 48(r7) -; CHECK-NEXT: stxv vs9, 32(r7) -; CHECK-NEXT: stxv vs10, 16(r7) -; CHECK-NEXT: stxv vs4, 112(r7) -; CHECK-NEXT: stxv vs5, 96(r7) -; CHECK-NEXT: stxv vs6, 80(r7) -; CHECK-NEXT: stxv vs7, 64(r7) -; CHECK-NEXT: stxv vs0, 176(r7) -; CHECK-NEXT: stxv vs1, 160(r7) -; CHECK-NEXT: stxv vs2, 144(r7) -; CHECK-NEXT: stxv vs3, 128(r7) +; CHECK-NEXT: stxv vs6, 80(r8) +; CHECK-NEXT: stxv vs7, 64(r8) +; CHECK-NEXT: stxv vs0, 176(r8) +; CHECK-NEXT: stxv vs1, 160(r8) +; CHECK-NEXT: stxv vs2, 144(r8) +; CHECK-NEXT: stxv vs3, 128(r8) ; CHECK-NEXT: bdnz .LBB9_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr @@ -458,29 +458,29 @@ ; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1 ; CHECK-BE-NEXT: lxv vs0, 32(r7) ; CHECK-BE-NEXT: lxv vs1, 48(r7) -; CHECK-BE-NEXT: xxmfacc acc2 ; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1 ; CHECK-BE-NEXT: lxv vs12, 64(r7) ; CHECK-BE-NEXT: lxv vs13, 80(r7) -; CHECK-BE-NEXT: rldic r7, r4, 6, 26 ; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: rldic r7, r4, 6, 26 ; CHECK-BE-NEXT: addi r4, r4, 3 -; CHECK-BE-NEXT: xxmfacc acc1 +; CHECK-BE-NEXT: add r8, r3, r7 +; CHECK-BE-NEXT: xxmfacc acc2 ; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13 ; CHECK-BE-NEXT: stxvx vs8, r3, r7 -; CHECK-BE-NEXT: add r7, r3, r7 +; CHECK-BE-NEXT: stxv vs9, 16(r8) +; CHECK-BE-NEXT: xxmfacc acc1 +; CHECK-BE-NEXT: stxv vs11, 48(r8) +; CHECK-BE-NEXT: stxv vs10, 32(r8) +; CHECK-BE-NEXT: stxv vs5, 80(r8) +; CHECK-BE-NEXT: stxv vs4, 64(r8) ; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs9, 16(r7) -; CHECK-BE-NEXT: stxv vs11, 48(r7) -; CHECK-BE-NEXT: stxv vs10, 32(r7) -; CHECK-BE-NEXT: stxv vs5, 80(r7) -; CHECK-BE-NEXT: stxv vs4, 64(r7) -; CHECK-BE-NEXT: stxv vs7, 112(r7) -; CHECK-BE-NEXT: stxv vs6, 96(r7) -; CHECK-BE-NEXT: stxv vs1, 144(r7) -; CHECK-BE-NEXT: stxv vs0, 128(r7) -; CHECK-BE-NEXT: stxv vs3, 176(r7) -; CHECK-BE-NEXT: stxv vs2, 160(r7) +; CHECK-BE-NEXT: stxv vs7, 112(r8) +; CHECK-BE-NEXT: stxv vs6, 96(r8) +; CHECK-BE-NEXT: stxv vs1, 144(r8) +; CHECK-BE-NEXT: stxv vs0, 128(r8) +; CHECK-BE-NEXT: stxv vs3, 176(r8) +; CHECK-BE-NEXT: stxv vs2, 160(r8) ; CHECK-BE-NEXT: bdnz .LBB9_2 ; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-BE-NEXT: blr @@ -671,8 +671,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxvp vsp36, 0(r4) ; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: lxvp vsp36, 0(r4) ; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 ; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r7) @@ -687,8 +687,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxvp vsp36, 0(r4) ; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: lxvp vsp36, 0(r4) ; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 ; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r7) @@ -715,8 +715,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxvp vsp36, 0(r5) ; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: lxvp vsp36, 0(r5) ; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 ; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r9) @@ -731,8 +731,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxvp vsp36, 0(r5) ; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: lxvp vsp36, 0(r5) ; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 ; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r9) diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll --- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll @@ -13,9 +13,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmr v1, v4 ; CHECK-NEXT: vmr v4, v3 -; CHECK-NEXT: ld r3, 96(r1) ; CHECK-NEXT: vmr v0, v2 ; CHECK-NEXT: xxlor vs3, v5, v5 +; CHECK-NEXT: ld r3, 96(r1) ; CHECK-NEXT: xxlor vs0, v0, v0 ; CHECK-NEXT: xxlor vs1, v1, v1 ; CHECK-NEXT: xxlor vs2, v4, v4 @@ -37,9 +37,9 @@ ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: vmr v1, v4 ; CHECK-BE-NEXT: vmr v4, v3 -; CHECK-BE-NEXT: ld r3, 112(r1) ; CHECK-BE-NEXT: vmr v0, v2 ; CHECK-BE-NEXT: xxlor vs3, v5, v5 +; CHECK-BE-NEXT: ld r3, 112(r1) ; CHECK-BE-NEXT: xxlor vs0, v0, v0 ; CHECK-BE-NEXT: xxlor vs1, v1, v1 ; CHECK-BE-NEXT: xxlor vs2, v4, v4 @@ -73,9 +73,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lxv v2, 0(r3) ; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: xxlor vs0, v2, v2 ; CHECK-NEXT: lxv v4, 0(r5) ; CHECK-NEXT: lxv v5, 0(r6) -; CHECK-NEXT: xxlor vs0, v2, v2 ; CHECK-NEXT: xxlor vs1, v3, v3 ; CHECK-NEXT: xxlor vs2, v4, v4 ; CHECK-NEXT: xxlor vs3, v5, v5 @@ -97,9 +97,9 @@ ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: lxv v2, 0(r3) ; CHECK-BE-NEXT: lxv v3, 0(r4) +; CHECK-BE-NEXT: xxlor vs0, v2, v2 ; CHECK-BE-NEXT: lxv v4, 0(r5) ; CHECK-BE-NEXT: lxv v5, 0(r6) -; CHECK-BE-NEXT: xxlor vs0, v2, v2 ; CHECK-BE-NEXT: xxlor vs1, v3, v3 ; CHECK-BE-NEXT: xxlor vs2, v4, v4 ; CHECK-BE-NEXT: xxlor vs3, v5, v5 @@ -1406,8 +1406,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: xvf64gerpp acc0, vsp36, v2 ; CHECK-NEXT: xxmfacc acc0 @@ -1423,8 +1423,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: xvf64gerpp acc0, vsp36, v2 ; CHECK-BE-NEXT: xxmfacc acc0 @@ -1454,8 +1454,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: xvf64gerpn acc0, vsp36, v2 ; CHECK-NEXT: xxmfacc acc0 @@ -1471,8 +1471,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: xvf64gerpn acc0, vsp36, v2 ; CHECK-BE-NEXT: xxmfacc acc0 @@ -1502,8 +1502,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 ; CHECK-NEXT: xxmfacc acc0 @@ -1519,8 +1519,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 ; CHECK-BE-NEXT: xxmfacc acc0 @@ -1550,8 +1550,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: xvf64gernn acc0, vsp36, v2 ; CHECK-NEXT: xxmfacc acc0 @@ -1567,8 +1567,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: xvf64gernn acc0, vsp36, v2 ; CHECK-BE-NEXT: xxmfacc acc0 @@ -1634,8 +1634,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 ; CHECK-NEXT: xxmfacc acc0 @@ -1651,8 +1651,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 ; CHECK-BE-NEXT: xxmfacc acc0 @@ -1682,8 +1682,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 ; CHECK-NEXT: xxmfacc acc0 @@ -1699,8 +1699,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 ; CHECK-BE-NEXT: xxmfacc acc0 @@ -1730,8 +1730,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 ; CHECK-NEXT: xxmfacc acc0 @@ -1747,8 +1747,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 ; CHECK-BE-NEXT: xxmfacc acc0 @@ -1778,8 +1778,8 @@ ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 ; CHECK-NEXT: xxmfacc acc0 @@ -1795,8 +1795,8 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 ; CHECK-BE-NEXT: xxmfacc acc0 diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll --- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll +++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll @@ -13,13 +13,13 @@ define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) { ; CHECK-LABEL: testPHI1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cmpwi r5, 3 ; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: cmpwi r5, 3 ; CHECK-NEXT: blt cr0, .LBB0_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: clrldi r5, r5, 32 ; CHECK-NEXT: lxv v2, 0(r4) ; CHECK-NEXT: lxv v3, 16(r4) +; CHECK-NEXT: clrldi r5, r5, 32 ; CHECK-NEXT: addi r4, r4, 32 ; CHECK-NEXT: addi r5, r5, -2 ; CHECK-NEXT: mtctr r5 @@ -40,13 +40,13 @@ ; ; CHECK-BE-LABEL: testPHI1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: cmpwi r5, 3 ; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: cmpwi r5, 3 ; CHECK-BE-NEXT: blt cr0, .LBB0_3 ; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader -; CHECK-BE-NEXT: clrldi r5, r5, 32 ; CHECK-BE-NEXT: lxv v2, 0(r4) ; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: clrldi r5, r5, 32 ; CHECK-BE-NEXT: addi r4, r4, 32 ; CHECK-BE-NEXT: addi r5, r5, -2 ; CHECK-BE-NEXT: mtctr r5 @@ -110,8 +110,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv v2, 0(r4) ; CHECK-NEXT: lxv v3, 16(r4) -; CHECK-NEXT: lxv vs4, 32(r4) ; CHECK-NEXT: cmpwi r5, 4 +; CHECK-NEXT: lxv vs4, 32(r4) ; CHECK-NEXT: xvf64ger acc0, vsp34, vs4 ; CHECK-NEXT: blt cr0, .LBB1_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader @@ -138,8 +138,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v2, 0(r4) ; CHECK-BE-NEXT: lxv v3, 16(r4) -; CHECK-BE-NEXT: lxv vs4, 32(r4) ; CHECK-BE-NEXT: cmpwi r5, 4 +; CHECK-BE-NEXT: lxv vs4, 32(r4) ; CHECK-BE-NEXT: xvf64ger acc0, vsp34, vs4 ; CHECK-BE-NEXT: blt cr0, .LBB1_3 ; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader @@ -273,8 +273,8 @@ ; CHECK-NEXT: xvf32gernp acc0, v2, v2 ; CHECK-NEXT: bdnz .LBB3_4 ; CHECK-NEXT: .LBB3_5: # %for.cond.cleanup -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: stxv vs0, 48(r5) ; CHECK-NEXT: stxv vs1, 32(r5) ; CHECK-NEXT: stxv vs2, 16(r5) @@ -305,8 +305,8 @@ ; CHECK-BE-NEXT: xvf32gernp acc0, v2, v2 ; CHECK-BE-NEXT: bdnz .LBB3_4 ; CHECK-BE-NEXT: .LBB3_5: # %for.cond.cleanup -; CHECK-BE-NEXT: li r3, 0 ; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: li r3, 0 ; CHECK-BE-NEXT: stxv vs1, 16(r5) ; CHECK-BE-NEXT: stxv vs0, 0(r5) ; CHECK-BE-NEXT: stxv vs3, 48(r5) diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -10,215 +10,220 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stdu 1, -576(1) -; CHECK-NEXT: .cfi_def_cfa_offset 576 -; CHECK-NEXT: .cfi_offset r14, -160 -; CHECK-NEXT: .cfi_offset r15, -152 -; CHECK-NEXT: .cfi_offset r16, -144 -; CHECK-NEXT: .cfi_offset r17, -136 -; CHECK-NEXT: .cfi_offset r18, -128 -; CHECK-NEXT: .cfi_offset r19, -120 -; CHECK-NEXT: .cfi_offset r20, -112 -; CHECK-NEXT: .cfi_offset r21, -104 -; CHECK-NEXT: .cfi_offset r22, -96 -; CHECK-NEXT: .cfi_offset r23, -88 -; CHECK-NEXT: .cfi_offset r24, -80 -; CHECK-NEXT: .cfi_offset r25, -72 -; CHECK-NEXT: .cfi_offset r26, -64 -; CHECK-NEXT: .cfi_offset r27, -56 -; CHECK-NEXT: .cfi_offset r28, -48 -; CHECK-NEXT: .cfi_offset r29, -40 -; CHECK-NEXT: .cfi_offset r30, -32 -; CHECK-NEXT: .cfi_offset r31, -24 +; CHECK-NEXT: stdu 1, -592(1) +; CHECK-NEXT: .cfi_def_cfa_offset 592 +; CHECK-NEXT: .cfi_offset r14, -192 +; CHECK-NEXT: .cfi_offset r15, -184 +; CHECK-NEXT: .cfi_offset r16, -176 +; CHECK-NEXT: .cfi_offset r17, -168 +; CHECK-NEXT: .cfi_offset r18, -160 +; CHECK-NEXT: .cfi_offset r19, -152 +; CHECK-NEXT: .cfi_offset r20, -144 +; CHECK-NEXT: .cfi_offset r21, -136 +; CHECK-NEXT: .cfi_offset r22, -128 +; CHECK-NEXT: .cfi_offset r23, -120 +; CHECK-NEXT: .cfi_offset r24, -112 +; CHECK-NEXT: .cfi_offset r25, -104 +; CHECK-NEXT: .cfi_offset r26, -96 +; CHECK-NEXT: .cfi_offset r27, -88 +; CHECK-NEXT: .cfi_offset r28, -80 +; CHECK-NEXT: .cfi_offset r29, -72 +; CHECK-NEXT: .cfi_offset r30, -64 +; CHECK-NEXT: .cfi_offset r31, -56 +; CHECK-NEXT: .cfi_offset f26, -48 +; CHECK-NEXT: .cfi_offset f27, -40 +; CHECK-NEXT: .cfi_offset f28, -32 +; CHECK-NEXT: .cfi_offset f29, -24 ; CHECK-NEXT: .cfi_offset f30, -16 ; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v20, -352 -; CHECK-NEXT: .cfi_offset v21, -336 -; CHECK-NEXT: .cfi_offset v22, -320 -; CHECK-NEXT: .cfi_offset v23, -304 -; CHECK-NEXT: .cfi_offset v24, -288 -; CHECK-NEXT: .cfi_offset v25, -272 -; CHECK-NEXT: .cfi_offset v26, -256 -; CHECK-NEXT: .cfi_offset v27, -240 -; CHECK-NEXT: .cfi_offset v28, -224 -; CHECK-NEXT: .cfi_offset v29, -208 -; CHECK-NEXT: .cfi_offset v30, -192 -; CHECK-NEXT: .cfi_offset v31, -176 +; CHECK-NEXT: .cfi_offset v20, -384 +; CHECK-NEXT: .cfi_offset v21, -368 +; CHECK-NEXT: .cfi_offset v22, -352 +; CHECK-NEXT: .cfi_offset v23, -336 +; CHECK-NEXT: .cfi_offset v24, -320 +; CHECK-NEXT: .cfi_offset v25, -304 +; CHECK-NEXT: .cfi_offset v26, -288 +; CHECK-NEXT: .cfi_offset v27, -272 +; CHECK-NEXT: .cfi_offset v28, -256 +; CHECK-NEXT: .cfi_offset v29, -240 +; CHECK-NEXT: .cfi_offset v30, -224 +; CHECK-NEXT: .cfi_offset v31, -208 ; CHECK-NEXT: lwz 4, 0(4) -; CHECK-NEXT: std 14, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 52, 224(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 53, 240(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 54, 256(1) # 16-byte Folded Spill -; CHECK-NEXT: std 16, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 55, 272(1) # 16-byte Folded Spill -; CHECK-NEXT: std 18, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 56, 288(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 57, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: std 20, 464(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 58, 320(1) # 16-byte Folded Spill -; CHECK-NEXT: std 22, 480(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 59, 336(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 60, 352(1) # 16-byte Folded Spill -; CHECK-NEXT: std 24, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 61, 368(1) # 16-byte Folded Spill -; CHECK-NEXT: std 26, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 62, 384(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 63, 400(1) # 16-byte Folded Spill -; CHECK-NEXT: std 28, 528(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 536(1) # 8-byte Folded Spill +; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill ; CHECK-NEXT: cmpwi 4, 1 -; CHECK-NEXT: std 30, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: std 31, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 30, 560(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 568(1) # 8-byte Folded Spill +; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill +; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill ; CHECK-NEXT: blt 0, .LBB0_7 ; CHECK-NEXT: # %bb.1: # %_loop_1_do_.lr.ph -; CHECK-NEXT: mr 23, 5 +; CHECK-NEXT: mr 22, 5 ; CHECK-NEXT: lwz 5, 0(3) ; CHECK-NEXT: cmpwi 5, 1 ; CHECK-NEXT: blt 0, .LBB0_7 ; CHECK-NEXT: # %bb.2: # %_loop_1_do_.preheader +; CHECK-NEXT: mr 14, 6 +; CHECK-NEXT: ld 6, 712(1) +; CHECK-NEXT: lwa 3, 0(7) ; CHECK-NEXT: addi 5, 5, 1 -; CHECK-NEXT: li 20, 9 +; CHECK-NEXT: std 8, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: std 9, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 11, 10 +; CHECK-NEXT: cmpldi 5, 9 +; CHECK-NEXT: lxv 4, 0(8) +; CHECK-NEXT: ld 8, 696(1) +; CHECK-NEXT: ld 10, 736(1) ; CHECK-NEXT: ld 28, 824(1) -; CHECK-NEXT: ld 19, 712(1) -; CHECK-NEXT: lwa 3, 0(7) -; CHECK-NEXT: ld 7, 784(1) -; CHECK-NEXT: ld 12, 776(1) -; CHECK-NEXT: ld 11, 768(1) -; CHECK-NEXT: ld 2, 760(1) +; CHECK-NEXT: std 6, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: std 10, 96(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 0, 0(6) +; CHECK-NEXT: li 6, 9 +; CHECK-NEXT: ld 7, 688(1) +; CHECK-NEXT: ld 27, 840(1) ; CHECK-NEXT: ld 29, 832(1) -; CHECK-NEXT: cmpldi 5, 9 -; CHECK-NEXT: ld 27, 816(1) -; CHECK-NEXT: ld 26, 808(1) -; CHECK-NEXT: ld 25, 800(1) -; CHECK-NEXT: ld 24, 792(1) -; CHECK-NEXT: iselgt 5, 5, 20 -; CHECK-NEXT: ld 30, 752(1) -; CHECK-NEXT: ld 22, 744(1) -; CHECK-NEXT: ld 21, 736(1) -; CHECK-NEXT: ld 20, 728(1) -; CHECK-NEXT: ld 18, 704(1) -; CHECK-NEXT: ld 17, 696(1) -; CHECK-NEXT: ld 16, 688(1) -; CHECK-NEXT: ld 14, 680(1) -; CHECK-NEXT: sldi 0, 3, 2 -; CHECK-NEXT: std 5, 216(1) # 8-byte Folded Spill -; CHECK-NEXT: std 28, 208(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 5, 4 -; CHECK-NEXT: ld 4, 720(1) -; CHECK-NEXT: std 19, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: std 4, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 11, 0(4) -; CHECK-NEXT: mr 4, 5 -; CHECK-NEXT: ld 5, 216(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 672(1) -; CHECK-NEXT: sldi 31, 3, 1 +; CHECK-NEXT: ld 26, 816(1) +; CHECK-NEXT: ld 25, 808(1) +; CHECK-NEXT: ld 24, 800(1) +; CHECK-NEXT: ld 23, 792(1) ; CHECK-NEXT: std 8, 32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 9, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 43, 0(8) -; CHECK-NEXT: mr 8, 6 +; CHECK-NEXT: sldi 0, 3, 1 +; CHECK-NEXT: sldi 31, 3, 2 +; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: iselgt 5, 5, 6 ; CHECK-NEXT: sldi 6, 3, 3 -; CHECK-NEXT: std 2, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: std 11, 152(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 3, 0(2) -; CHECK-NEXT: lxv 2, 0(11) -; CHECK-NEXT: lxv 0, 0(7) -; CHECK-NEXT: add 6, 6, 23 -; CHECK-NEXT: lxv 7, 0(28) -; CHECK-NEXT: add 28, 3, 31 -; CHECK-NEXT: lxv 42, 0(9) -; CHECK-NEXT: lxv 41, 0(10) -; CHECK-NEXT: lxv 40, 0(15) -; CHECK-NEXT: lxv 39, 0(14) -; CHECK-NEXT: lxv 38, 0(16) -; CHECK-NEXT: lxv 33, 0(17) -; CHECK-NEXT: lxv 37, 0(18) -; CHECK-NEXT: lxv 13, 0(19) -; CHECK-NEXT: lxv 10, 0(20) -; CHECK-NEXT: lxv 8, 0(21) -; CHECK-NEXT: lxv 6, 0(22) -; CHECK-NEXT: lxv 4, 0(30) -; CHECK-NEXT: lxv 1, 0(12) -; CHECK-NEXT: lxv 32, 0(24) -; CHECK-NEXT: lxv 36, 0(25) -; CHECK-NEXT: lxv 12, 0(26) -; CHECK-NEXT: lxv 9, 0(27) -; CHECK-NEXT: lxv 5, 0(29) +; CHECK-NEXT: ld 21, 784(1) +; CHECK-NEXT: ld 20, 776(1) +; CHECK-NEXT: ld 19, 768(1) +; CHECK-NEXT: ld 18, 760(1) +; CHECK-NEXT: std 18, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: add 2, 6, 22 +; CHECK-NEXT: ld 17, 752(1) +; CHECK-NEXT: ld 16, 744(1) +; CHECK-NEXT: lxv 3, 0(9) +; CHECK-NEXT: ld 6, 728(1) ; CHECK-NEXT: addi 5, 5, -2 +; CHECK-NEXT: std 7, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 6, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 15, 720(1) +; CHECK-NEXT: ld 9, 704(1) +; CHECK-NEXT: lxv 43, 0(8) +; CHECK-NEXT: ld 8, 848(1) +; CHECK-NEXT: std 11, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 2, 0(11) ; CHECK-NEXT: sldi 11, 3, 4 -; CHECK-NEXT: std 12, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: std 7, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 3, 0 -; CHECK-NEXT: add 12, 11, 23 -; CHECK-NEXT: addi 11, 6, 32 -; CHECK-NEXT: addi 12, 12, 32 -; CHECK-NEXT: std 22, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 30, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: mulli 26, 3, 48 -; CHECK-NEXT: mulli 22, 3, 6 -; CHECK-NEXT: sldi 6, 7, 3 -; CHECK-NEXT: add 30, 23, 6 -; CHECK-NEXT: std 29, 216(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: li 25, 1 -; CHECK-NEXT: li 24, 0 -; CHECK-NEXT: std 10, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 14, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: std 16, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 18, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 120(1) # 8-byte Folded Spill ; CHECK-NEXT: rldicl 5, 5, 61, 3 +; CHECK-NEXT: lxv 1, 0(7) +; CHECK-NEXT: add 7, 3, 31 +; CHECK-NEXT: add 12, 11, 22 +; CHECK-NEXT: addi 11, 2, 32 ; CHECK-NEXT: addi 2, 5, 1 +; CHECK-NEXT: lxv 6, 0(28) ; CHECK-NEXT: sldi 5, 3, 5 -; CHECK-NEXT: add 29, 23, 5 +; CHECK-NEXT: add 28, 3, 0 +; CHECK-NEXT: lxv 42, 0(9) +; CHECK-NEXT: lxv 41, 0(15) +; CHECK-NEXT: lxv 40, 0(6) +; CHECK-NEXT: lxv 39, 0(10) +; CHECK-NEXT: lxv 38, 0(16) +; CHECK-NEXT: sldi 30, 7, 3 +; CHECK-NEXT: addi 12, 12, 32 +; CHECK-NEXT: add 30, 22, 30 +; CHECK-NEXT: std 16, 104(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 33, 0(17) +; CHECK-NEXT: lxv 32, 0(18) +; CHECK-NEXT: lxv 37, 0(19) +; CHECK-NEXT: lxv 36, 0(20) +; CHECK-NEXT: lxv 13, 0(21) +; CHECK-NEXT: lxv 12, 0(23) +; CHECK-NEXT: li 23, 0 +; CHECK-NEXT: lxv 11, 0(24) +; CHECK-NEXT: li 24, 1 +; CHECK-NEXT: lxv 9, 0(25) +; CHECK-NEXT: mulli 25, 3, 6 +; CHECK-NEXT: lxv 8, 0(26) +; CHECK-NEXT: mulli 26, 3, 48 +; CHECK-NEXT: lxv 5, 0(29) +; CHECK-NEXT: add 29, 22, 5 ; CHECK-NEXT: sldi 5, 28, 3 -; CHECK-NEXT: add 27, 23, 5 -; CHECK-NEXT: mr 5, 23 +; CHECK-NEXT: lxv 7, 0(27) +; CHECK-NEXT: add 27, 22, 5 +; CHECK-NEXT: mr 5, 22 +; CHECK-NEXT: lxv 10, 0(8) ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 6, 22, 24, 7 -; CHECK-NEXT: maddld 20, 22, 24, 0 +; CHECK-NEXT: maddld 6, 25, 23, 7 ; CHECK-NEXT: mtctr 2 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 21, 23, 6 -; CHECK-NEXT: sldi 6, 20, 3 -; CHECK-NEXT: add 20, 23, 6 -; CHECK-NEXT: maddld 6, 22, 24, 28 +; CHECK-NEXT: add 21, 22, 6 +; CHECK-NEXT: maddld 6, 25, 23, 31 +; CHECK-NEXT: sldi 6, 6, 3 +; CHECK-NEXT: add 20, 22, 6 +; CHECK-NEXT: maddld 6, 25, 23, 28 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 19, 23, 6 -; CHECK-NEXT: maddld 6, 22, 24, 31 +; CHECK-NEXT: add 19, 22, 6 +; CHECK-NEXT: maddld 6, 25, 23, 0 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 18, 23, 6 -; CHECK-NEXT: maddld 6, 22, 24, 3 +; CHECK-NEXT: add 18, 22, 6 +; CHECK-NEXT: maddld 6, 25, 23, 3 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 17, 23, 6 -; CHECK-NEXT: mulld 6, 22, 24 +; CHECK-NEXT: add 17, 22, 6 +; CHECK-NEXT: mulld 6, 25, 23 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 16, 23, 6 -; CHECK-NEXT: mr 6, 8 +; CHECK-NEXT: add 16, 22, 6 +; CHECK-NEXT: mr 6, 14 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 34, 0(6) ; CHECK-NEXT: lxvp 44, 0(16) +; CHECK-NEXT: xvmaddadp 4, 45, 35 ; CHECK-NEXT: lxvp 46, 0(17) +; CHECK-NEXT: xvmaddadp 3, 47, 35 ; CHECK-NEXT: lxvp 48, 0(18) ; CHECK-NEXT: lxvp 50, 0(19) ; CHECK-NEXT: lxvp 62, 0(20) @@ -228,135 +233,135 @@ ; CHECK-NEXT: lxvp 54, 32(17) ; CHECK-NEXT: lxvp 52, 32(18) ; CHECK-NEXT: lxvp 30, 32(19) +; CHECK-NEXT: lxvp 28, 32(20) +; CHECK-NEXT: lxvp 26, 32(21) +; CHECK-NEXT: xvmaddadp 2, 49, 35 +; CHECK-NEXT: xvmaddadp 1, 51, 35 +; CHECK-NEXT: xvmaddadp 43, 63, 35 +; CHECK-NEXT: xvmaddadp 42, 61, 35 +; CHECK-NEXT: xvmaddadp 0, 44, 34 +; CHECK-NEXT: xvmaddadp 41, 46, 34 +; CHECK-NEXT: xvmaddadp 40, 48, 34 +; CHECK-NEXT: xvmaddadp 39, 50, 34 +; CHECK-NEXT: xvmaddadp 38, 62, 34 +; CHECK-NEXT: xvmaddadp 33, 60, 34 +; CHECK-NEXT: xvmaddadp 32, 57, 59 +; CHECK-NEXT: xvmaddadp 37, 55, 59 +; CHECK-NEXT: xvmaddadp 36, 53, 59 +; CHECK-NEXT: xvmaddadp 13, 31, 59 +; CHECK-NEXT: xvmaddadp 12, 29, 59 +; CHECK-NEXT: xvmaddadp 11, 27, 59 +; CHECK-NEXT: xvmaddadp 9, 56, 58 +; CHECK-NEXT: xvmaddadp 8, 54, 58 +; CHECK-NEXT: xvmaddadp 6, 52, 58 +; CHECK-NEXT: xvmaddadp 5, 30, 58 +; CHECK-NEXT: xvmaddadp 7, 28, 58 +; CHECK-NEXT: xvmaddadp 10, 26, 58 ; CHECK-NEXT: addi 6, 6, 64 ; CHECK-NEXT: addi 16, 16, 64 ; CHECK-NEXT: addi 17, 17, 64 ; CHECK-NEXT: addi 18, 18, 64 ; CHECK-NEXT: addi 19, 19, 64 -; CHECK-NEXT: xvmaddadp 43, 45, 35 -; CHECK-NEXT: xvmaddadp 42, 47, 35 -; CHECK-NEXT: xvmaddadp 41, 49, 35 -; CHECK-NEXT: xvmaddadp 40, 51, 35 -; CHECK-NEXT: xvmaddadp 39, 63, 35 -; CHECK-NEXT: xvmaddadp 38, 61, 35 -; CHECK-NEXT: xvmaddadp 33, 44, 34 -; CHECK-NEXT: xvmaddadp 37, 46, 34 -; CHECK-NEXT: xvmaddadp 13, 48, 34 -; CHECK-NEXT: xvmaddadp 11, 50, 34 -; CHECK-NEXT: xvmaddadp 10, 62, 34 -; CHECK-NEXT: xvmaddadp 8, 60, 34 -; CHECK-NEXT: lxvp 34, 32(20) -; CHECK-NEXT: lxvp 44, 32(21) ; CHECK-NEXT: addi 20, 20, 64 ; CHECK-NEXT: addi 21, 21, 64 -; CHECK-NEXT: xvmaddadp 6, 57, 59 -; CHECK-NEXT: xvmaddadp 4, 55, 59 -; CHECK-NEXT: xvmaddadp 3, 53, 59 -; CHECK-NEXT: xvmaddadp 2, 31, 59 -; CHECK-NEXT: xvmaddadp 32, 56, 58 -; CHECK-NEXT: xvmaddadp 36, 54, 58 -; CHECK-NEXT: xvmaddadp 12, 52, 58 -; CHECK-NEXT: xvmaddadp 9, 30, 58 -; CHECK-NEXT: xvmaddadp 1, 35, 59 -; CHECK-NEXT: xvmaddadp 0, 45, 59 -; CHECK-NEXT: xvmaddadp 7, 34, 58 -; CHECK-NEXT: xvmaddadp 5, 44, 58 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # -; CHECK-NEXT: addi 25, 25, 6 +; CHECK-NEXT: addi 24, 24, 6 ; CHECK-NEXT: add 5, 5, 26 ; CHECK-NEXT: add 11, 11, 26 ; CHECK-NEXT: add 30, 30, 26 ; CHECK-NEXT: add 12, 12, 26 ; CHECK-NEXT: add 29, 29, 26 ; CHECK-NEXT: add 27, 27, 26 -; CHECK-NEXT: addi 24, 24, 1 -; CHECK-NEXT: cmpld 25, 4 +; CHECK-NEXT: addi 23, 23, 1 +; CHECK-NEXT: cmpld 24, 4 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit -; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 43, 0(3) ; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 42, 0(3) +; CHECK-NEXT: stxv 4, 0(3) ; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: stxv 3, 0(3) ; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 40, 0(3) -; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 39, 0(3) -; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: stxv 2, 0(3) ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 33, 0(3) +; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 43, 0(3) ; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 37, 0(3) +; CHECK-NEXT: stxv 42, 0(9) +; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 13, 0(3) +; CHECK-NEXT: stxv 39, 0(3) ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: stxv 38, 0(3) ; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 10, 0(3) +; CHECK-NEXT: stxv 33, 0(3) ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 32, 0(3) ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) +; CHECK-NEXT: stxv 37, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 4, 0(3) +; CHECK-NEXT: stxv 36, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 3, 0(3) +; CHECK-NEXT: stxv 13, 0(3) ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 2, 0(3) +; CHECK-NEXT: stxv 12, 0(3) ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: stxv 11, 0(3) ; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: stxv 9, 0(3) ; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 32, 0(3) +; CHECK-NEXT: stxv 8, 0(3) ; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 36, 0(3) +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: stxv 5, 0(3) ; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) -; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 7, 0(3) -; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 5, 0(3) +; CHECK-NEXT: stxv 10, 0(8) ; CHECK-NEXT: .LBB0_7: # %_return_bb -; CHECK-NEXT: lxv 63, 400(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 62, 384(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 61, 368(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 60, 352(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 59, 336(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 58, 320(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 57, 304(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 56, 288(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 55, 272(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 54, 256(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 53, 240(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 52, 224(1) # 16-byte Folded Reload -; CHECK-NEXT: lfd 31, 568(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 30, 560(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 552(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 544(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 536(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 528(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 520(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 512(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 496(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 488(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 480(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 472(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 464(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 576 +; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload +; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 592 ; CHECK-NEXT: blr entry: %_val_l_ = load i32, i32* %.l, align 4 diff --git a/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll b/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll --- a/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll +++ b/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll @@ -26,34 +26,34 @@ ; CHECK-NEXT: stdu r1, -80(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv v2, 0(r3) ; CHECK-NEXT: mr r9, r6 ; CHECK-NEXT: mr r6, r5 -; CHECK-NEXT: li r5, 3 -; CHECK-NEXT: li r10, -127 -; CHECK-NEXT: lxv v2, 0(r3) -; CHECK-NEXT: stb r5, 0(0) -; CHECK-NEXT: stb r10, 0(r3) -; CHECK-NEXT: stb r5, 0(r3) -; CHECK-NEXT: lbz r5, 2(r7) -; CHECK-NEXT: li r2, 1 -; CHECK-NEXT: stb r10, 0(r3) -; CHECK-NEXT: pstxv v2, 64(r1), 0 -; CHECK-NEXT: vaddudm v3, v2, v2 -; CHECK-NEXT: mfvsrd r11, v2 ; CHECK-NEXT: li r0, 4 +; CHECK-NEXT: li r11, 3 +; CHECK-NEXT: std r0, 0(r3) +; CHECK-NEXT: stb r11, 0(0) +; CHECK-NEXT: li r12, -127 +; CHECK-NEXT: stb r12, 0(r3) +; CHECK-NEXT: li r2, 1 +; CHECK-NEXT: stb r11, 0(r3) +; CHECK-NEXT: stb r12, 0(r3) ; CHECK-NEXT: stw r2, 0(r3) +; CHECK-NEXT: mfvsrd r5, v2 +; CHECK-NEXT: vaddudm v3, v2, v2 +; CHECK-NEXT: pstxv v2, 64(r1), 0 +; CHECK-NEXT: neg r5, r5 +; CHECK-NEXT: mfvsrd r10, v3 +; CHECK-NEXT: std r5, 0(r3) +; CHECK-NEXT: lbz r5, 2(r7) ; CHECK-NEXT: mr r7, r9 +; CHECK-NEXT: neg r10, r10 +; CHECK-NEXT: std r2, 0(r3) ; CHECK-NEXT: std r0, 0(r3) +; CHECK-NEXT: std r10, 0(r3) ; CHECK-NEXT: rlwinm r5, r5, 0, 27, 27 -; CHECK-NEXT: mfvsrd r12, v3 -; CHECK-NEXT: neg r11, r11 ; CHECK-NEXT: stb r5, 0(0) ; CHECK-NEXT: lbz r5, 2(r8) -; CHECK-NEXT: neg r12, r12 -; CHECK-NEXT: std r11, 0(r3) -; CHECK-NEXT: std r2, 0(r3) -; CHECK-NEXT: std r0, 0(r3) -; CHECK-NEXT: std r12, 0(r3) ; CHECK-NEXT: rlwinm r5, r5, 0, 27, 27 ; CHECK-NEXT: stb r5, 0(r3) ; CHECK-NEXT: li r5, 2 @@ -74,36 +74,36 @@ ; CHECK-BE-NEXT: .cfi_def_cfa_offset 176 ; CHECK-BE-NEXT: .cfi_offset lr, 16 ; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: lxv v2, 0(r3) ; CHECK-BE-NEXT: mr r9, r6 ; CHECK-BE-NEXT: mr r6, r5 -; CHECK-BE-NEXT: li r5, 3 -; CHECK-BE-NEXT: li r11, -127 -; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: li r0, 4 +; CHECK-BE-NEXT: li r11, 3 ; CHECK-BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: pstxv v2, 144(r1), 0 -; CHECK-BE-NEXT: stb r5, 0(0) -; CHECK-BE-NEXT: stb r11, 0(r3) -; CHECK-BE-NEXT: stb r5, 0(r3) -; CHECK-BE-NEXT: lbz r5, 2(r7) -; CHECK-BE-NEXT: vaddudm v3, v2, v2 -; CHECK-BE-NEXT: mfvsrld r10, v2 +; CHECK-BE-NEXT: std r0, 0(r3) +; CHECK-BE-NEXT: stb r11, 0(0) +; CHECK-BE-NEXT: li r12, -127 +; CHECK-BE-NEXT: stb r12, 0(r3) ; CHECK-BE-NEXT: li r30, 1 ; CHECK-BE-NEXT: stb r11, 0(r3) -; CHECK-BE-NEXT: li r0, 4 +; CHECK-BE-NEXT: stb r12, 0(r3) +; CHECK-BE-NEXT: mfvsrld r5, v2 +; CHECK-BE-NEXT: vaddudm v3, v2, v2 ; CHECK-BE-NEXT: stw r30, 0(r3) +; CHECK-BE-NEXT: pstxv v2, 144(r1), 0 +; CHECK-BE-NEXT: mfvsrld r10, v3 +; CHECK-BE-NEXT: neg r5, r5 +; CHECK-BE-NEXT: std r5, 0(r3) +; CHECK-BE-NEXT: lbz r5, 2(r7) ; CHECK-BE-NEXT: mr r7, r9 +; CHECK-BE-NEXT: neg r10, r10 +; CHECK-BE-NEXT: std r30, 0(r3) ; CHECK-BE-NEXT: std r0, 0(r3) +; CHECK-BE-NEXT: std r10, 0(r3) ; CHECK-BE-NEXT: rlwinm r5, r5, 0, 27, 27 -; CHECK-BE-NEXT: mfvsrld r12, v3 ; CHECK-BE-NEXT: stb r5, 0(0) ; CHECK-BE-NEXT: lbz r5, 2(r8) -; CHECK-BE-NEXT: neg r10, r10 -; CHECK-BE-NEXT: neg r12, r12 -; CHECK-BE-NEXT: std r10, 0(r3) -; CHECK-BE-NEXT: std r30, 0(r3) -; CHECK-BE-NEXT: std r0, 0(r3) ; CHECK-BE-NEXT: rlwinm r5, r5, 0, 27, 27 -; CHECK-BE-NEXT: std r12, 0(r3) ; CHECK-BE-NEXT: stb r5, 0(r3) ; CHECK-BE-NEXT: li r5, 2 ; CHECK-BE-NEXT: stw r5, 0(r3) diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll @@ -82,17 +82,17 @@ ; CHECK-NEXT: .LBB0_12: # %bb40 ; CHECK-NEXT: mcrf cr6, cr4 ; CHECK-NEXT: crnot 4*cr4+eq, 4*cr4+eq -; CHECK-NEXT: crand 4*cr4+lt, 4*cr7+lt, 4*cr2+un +; CHECK-NEXT: crand 4*cr4+gt, 4*cr7+lt, 4*cr2+un +; CHECK-NEXT: crand 4*cr4+lt, 4*cr1+lt, 4*cr5+lt ; CHECK-NEXT: # implicit-def: $x6 -; CHECK-NEXT: crand 4*cr4+gt, 4*cr1+lt, 4*cr5+lt -; CHECK-NEXT: bc 4, 4*cr4+gt, .LBB0_14 +; CHECK-NEXT: bc 4, 4*cr4+lt, .LBB0_14 ; CHECK-NEXT: # %bb.13: # %bb48 ; CHECK-NEXT: ld r6, 0(r3) ; CHECK-NEXT: .LBB0_14: # %bb50 ; CHECK-NEXT: cmpwi r5, -1 ; CHECK-NEXT: crand 4*cr4+un, 4*cr3+lt, 4*cr4+eq ; CHECK-NEXT: # implicit-def: $r5 -; CHECK-NEXT: bc 4, 4*cr4+lt, .LBB0_16 +; CHECK-NEXT: bc 4, 4*cr4+gt, .LBB0_16 ; CHECK-NEXT: # %bb.15: # %bb52 ; CHECK-NEXT: lwz r5, 0(r3) ; CHECK-NEXT: .LBB0_16: # %bb54 @@ -111,18 +111,18 @@ ; CHECK-NEXT: cmpwi cr1, r5, 1 ; CHECK-NEXT: crand lt, gt, 4*cr4+eq ; CHECK-NEXT: # implicit-def: $x5 +; CHECK-NEXT: crand 4*cr4+eq, 4*cr3+eq, 4*cr4+eq ; CHECK-NEXT: setnbc r8, 4*cr5+gt ; CHECK-NEXT: crand 4*cr5+lt, 4*cr2+eq, 4*cr5+lt -; CHECK-NEXT: crand 4*cr4+eq, 4*cr3+eq, 4*cr4+eq -; CHECK-NEXT: crand gt, 4*cr1+lt, 4*cr4+lt +; CHECK-NEXT: crand gt, 4*cr1+lt, 4*cr4+gt ; CHECK-NEXT: stw r8, -24(r1) ; CHECK-NEXT: setnbc r8, 4*cr5+lt ; CHECK-NEXT: cmpwi cr5, r7, 1 ; CHECK-NEXT: stw r8, -28(r1) -; CHECK-NEXT: lwz r6, 92(r6) ; CHECK-NEXT: crand eq, 4*cr5+lt, 4*cr4+un +; CHECK-NEXT: lwz r6, 92(r6) ; CHECK-NEXT: cmpwi cr6, r6, 1 -; CHECK-NEXT: crand un, 4*cr6+lt, 4*cr4+gt +; CHECK-NEXT: crand un, 4*cr6+lt, 4*cr4+lt ; CHECK-NEXT: bc 4, gt, .LBB0_20 ; CHECK-NEXT: # %bb.19: # %bb68 ; CHECK-NEXT: ld r5, 0(r3) @@ -134,8 +134,8 @@ ; CHECK-NEXT: rlwimi r6, r7, 12, 20, 20 ; CHECK-NEXT: mtocrf 4, r6 ; CHECK-NEXT: ld r6, 0(r3) -; CHECK-NEXT: crandc 4*cr5+gt, lt, 4*cr3+eq ; CHECK-NEXT: lwz r8, -16(r1) +; CHECK-NEXT: crandc 4*cr5+gt, lt, 4*cr3+eq ; CHECK-NEXT: # implicit-def: $cr5eq ; CHECK-NEXT: crandc 4*cr5+lt, 4*cr5+lt, 4*cr7+eq ; CHECK-NEXT: mfocrf r7, 4 @@ -156,32 +156,32 @@ ; CHECK-NEXT: setbc r5, 4*cr5+un ; CHECK-NEXT: # implicit-def: $cr5un ; CHECK-NEXT: mfocrf r8, 4 -; CHECK-NEXT: add r5, r7, r5 ; CHECK-NEXT: rlwimi r8, r9, 9, 23, 23 ; CHECK-NEXT: lwz r9, -4(r1) +; CHECK-NEXT: add r5, r7, r5 ; CHECK-NEXT: mtocrf 4, r8 -; CHECK-NEXT: mtocrf 128, r9 -; CHECK-NEXT: lwz r9, -8(r1) ; CHECK-NEXT: isel r3, 0, r3, 4*cr5+lt ; CHECK-NEXT: setbc r8, 4*cr5+un ; CHECK-NEXT: isel r6, 0, r6, 4*cr5+gt ; CHECK-NEXT: isel r4, 0, r4, 4*cr5+eq +; CHECK-NEXT: mtocrf 128, r9 +; CHECK-NEXT: lwz r9, -8(r1) ; CHECK-NEXT: add r5, r8, r5 ; CHECK-NEXT: iseleq r3, 0, r3 ; CHECK-NEXT: mtfprd f0, r5 +; CHECK-NEXT: xscvsxddp f0, f0 ; CHECK-NEXT: mtocrf 128, r9 ; CHECK-NEXT: lwz r9, -12(r1) ; CHECK-NEXT: lwz r12, 8(r1) -; CHECK-NEXT: xscvsxddp f0, f0 ; CHECK-NEXT: iseleq r6, 0, r6 -; CHECK-NEXT: mtocrf 128, r9 ; CHECK-NEXT: add r3, r6, r3 +; CHECK-NEXT: xsmuldp f0, f0, f2 +; CHECK-NEXT: mtocrf 128, r9 ; CHECK-NEXT: mtocrf 32, r12 ; CHECK-NEXT: mtocrf 16, r12 ; CHECK-NEXT: mtocrf 8, r12 ; CHECK-NEXT: iseleq r4, 0, r4 ; CHECK-NEXT: add r3, r4, r3 -; CHECK-NEXT: xsmuldp f0, f0, f2 ; CHECK-NEXT: mtfprd f1, r3 ; CHECK-NEXT: xscvsxddp f1, f1 ; CHECK-NEXT: xsadddp f1, f0, f1 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll @@ -17,10 +17,9 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-LABEL: P10_Spill_CR_GT: -; CHECK: .localentry P10_Spill_CR_GT, 1 -; CHECK-NEXT: # %bb.0: # %bb -; CHECK-NEXT: mflr r0 +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mfcr r12 +; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stw r12, 8(r1) ; CHECK-NEXT: stdu r1, -64(r1) @@ -49,8 +48,8 @@ ; CHECK-NEXT: .LBB0_1: # %bb43 ; CHECK-NEXT: # ; CHECK-NEXT: bl call_1@notoc -; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: setnbc r3, 4*cr4+eq +; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: stb r4, 0(r3) ; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: .p2align 4 @@ -210,8 +209,8 @@ ; ; CHECK-BE-LABEL: P10_Spill_CR_GT: ; CHECK-BE: # %bb.0: # %bb -; CHECK-BE-NEXT: mflr r0 ; CHECK-BE-NEXT: mfcr r12 +; CHECK-BE-NEXT: mflr r0 ; CHECK-BE-NEXT: std r0, 16(r1) ; CHECK-BE-NEXT: stw r12, 8(r1) ; CHECK-BE-NEXT: stdu r1, -144(r1) @@ -242,8 +241,8 @@ ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: bl call_1 ; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: li r4, 0 ; CHECK-BE-NEXT: setnbc r3, 4*cr4+eq +; CHECK-BE-NEXT: li r4, 0 ; CHECK-BE-NEXT: stb r4, 0(r3) ; CHECK-BE-NEXT: li r4, 0 ; CHECK-BE-NEXT: .p2align 4 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll @@ -25,10 +25,9 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr { ; CHECK-LABEL: P10_Spill_CR_LT: -; CHECK: .localentry P10_Spill_CR_LT, 1 -; CHECK-NEXT: # %bb.0: # %bb -; CHECK-NEXT: mflr r0 +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mfcr r12 +; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stw r12, 8(r1) ; CHECK-NEXT: stdu r1, -80(r1) @@ -90,8 +89,8 @@ ; ; CHECK-BE-LABEL: P10_Spill_CR_LT: ; CHECK-BE: # %bb.0: # %bb -; CHECK-BE-NEXT: mflr r0 ; CHECK-BE-NEXT: mfcr r12 +; CHECK-BE-NEXT: mflr r0 ; CHECK-BE-NEXT: std r0, 16(r1) ; CHECK-BE-NEXT: stw r12, 8(r1) ; CHECK-BE-NEXT: stdu r1, -160(r1) diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll @@ -36,8 +36,8 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unnamed_addr { ; CHECK-LABEL: P10_Spill_CR_UN: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: mflr r0 ; CHECK-NEXT: mfcr r12 +; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stw r12, 8(r1) ; CHECK-NEXT: stdu r1, -224(r1) @@ -84,8 +84,8 @@ ; CHECK-NEXT: # %bb.4: # %bb37 ; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_14 ; CHECK-NEXT: .LBB0_5: # %bb42 -; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: paddi r3, 0, global_1@PCREL, 1 +; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: cmpwi r28, 0 ; CHECK-NEXT: isel r3, r3, r4, 4*cr2+gt ; CHECK-NEXT: crnot 4*cr2+lt, eq @@ -145,19 +145,19 @@ ; CHECK-NEXT: # implicit-def: $r3 ; CHECK-NEXT: .LBB0_15: # %bb50 ; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: xxspltidp vs3, -1082130432 +; CHECK-NEXT: xxspltidp vs4, -1082130432 ; CHECK-NEXT: extsh r9, r3 ; CHECK-NEXT: extsw r6, r28 ; CHECK-NEXT: li r5, 0 -; CHECK-NEXT: xxspltidp vs3, -1082130432 -; CHECK-NEXT: xxspltidp vs4, -1082130432 +; CHECK-NEXT: li r7, 0 ; CHECK-NEXT: std r30, 104(r1) ; CHECK-NEXT: std r29, 96(r1) -; CHECK-NEXT: li r7, 0 ; CHECK-NEXT: li r8, 0 ; CHECK-NEXT: li r10, 0 +; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: std r4, 152(r1) ; CHECK-NEXT: li r4, -1 -; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: std r4, 112(r1) ; CHECK-NEXT: li r4, 1024 ; CHECK-NEXT: bl call_4@notoc @@ -182,8 +182,8 @@ ; ; CHECK-BE-LABEL: P10_Spill_CR_UN: ; CHECK-BE: # %bb.0: # %bb -; CHECK-BE-NEXT: mflr r0 ; CHECK-BE-NEXT: mfcr r12 +; CHECK-BE-NEXT: mflr r0 ; CHECK-BE-NEXT: std r0, 16(r1) ; CHECK-BE-NEXT: stw r12, 8(r1) ; CHECK-BE-NEXT: stdu r1, -240(r1) @@ -303,19 +303,19 @@ ; CHECK-BE-NEXT: # implicit-def: $r3 ; CHECK-BE-NEXT: .LBB0_15: # %bb50 ; CHECK-BE-NEXT: li r4, 0 +; CHECK-BE-NEXT: xxspltidp vs3, -1082130432 +; CHECK-BE-NEXT: xxspltidp vs4, -1082130432 ; CHECK-BE-NEXT: extsh r9, r3 ; CHECK-BE-NEXT: extsw r6, r28 ; CHECK-BE-NEXT: li r5, 0 -; CHECK-BE-NEXT: xxspltidp vs3, -1082130432 -; CHECK-BE-NEXT: xxspltidp vs4, -1082130432 +; CHECK-BE-NEXT: li r7, 0 ; CHECK-BE-NEXT: std r30, 120(r1) ; CHECK-BE-NEXT: std r29, 112(r1) -; CHECK-BE-NEXT: li r7, 0 ; CHECK-BE-NEXT: li r8, 0 ; CHECK-BE-NEXT: li r10, 0 +; CHECK-BE-NEXT: xxlxor f1, f1, f1 ; CHECK-BE-NEXT: std r4, 168(r1) ; CHECK-BE-NEXT: li r4, -1 -; CHECK-BE-NEXT: xxlxor f1, f1, f1 ; CHECK-BE-NEXT: std r4, 128(r1) ; CHECK-BE-NEXT: li r4, 1024 ; CHECK-BE-NEXT: bl call_4 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -109,10 +109,10 @@ ; CHECK-S-NEXT: sub r29, r8, r9 ; CHECK-S-NEXT: add r9, r10, r9 ; CHECK-S-NEXT: sub r10, r10, r3 +; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: sub r12, r4, r5 ; CHECK-S-NEXT: add r0, r6, r5 ; CHECK-S-NEXT: sub r2, r6, r7 -; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: add r30, r8, r7 ; CHECK-S-NEXT: mullw r3, r3, r11 ; CHECK-S-NEXT: mullw r3, r3, r5 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -353,9 +353,9 @@ ; CHECK-S-NEXT: stdu r1, -32(r1) ; CHECK-S-NEXT: .cfi_def_cfa_offset 32 ; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: mtctr r5 ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: mr r12, r5 +; CHECK-S-NEXT: mtctr r5 ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bctrl ; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1 @@ -383,8 +383,8 @@ ; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: std r0, 16(r1) ; CHECK-S-NEXT: stdu r1, -48(r1) -; CHECK-S-NEXT: mtctr r5 ; CHECK-S-NEXT: mr r12, r5 +; CHECK-S-NEXT: mtctr r5 ; CHECK-S-NEXT: mr r30, r4 ; CHECK-S-NEXT: bctrl ; CHECK-S-NEXT: add r3, r3, r30 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll @@ -20,8 +20,8 @@ define dso_local signext i32 @jumptable(i32 signext %param) { ; CHECK-R-LABEL: jumptable: ; CHECK-R: # %bb.1: # %entry -; CHECK-R-NEXT: rldic r4, r4 ; CHECK-R-NEXT: paddi r5, 0, .LJTI0_0@PCREL, 1 +; CHECK-R-NEXT: rldic r4, r4 ; CHECK-R-NEXT: lwax r4, r4, r5 ; CHECK-R-NEXT: add r4, r4, r5 ; CHECK-R-NEXT: mtctr r4 @@ -35,8 +35,8 @@ ; CHECK-A-LE-NEXT: bctr ; CHECK-A-BE-LABEL: jumptable: ; CHECK-A-BE: # %bb.1: # %entry -; CHECK-A-BE-NEXT: rldic r4, r4 ; CHECK-A-BE-NEXT: paddi r5, 0, .LJTI0_0@PCREL, 1 +; CHECK-A-BE-NEXT: rldic r4, r4 ; CHECK-A-BE-NEXT: lwax r4, r4, r5 ; CHECK-A-BE-NEXT: mtctr r4 ; CHECK-A-BE-NEXT: bctr diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -34,8 +34,7 @@ define dso_local void @TailCallLocalFuncPtr() local_unnamed_addr { ; CHECK-LABEL: TailCallLocalFuncPtr: -; CHECK: .localentry TailCallLocalFuncPtr, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r12, FuncLocal@PCREL(0), 1 ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr @@ -48,8 +47,7 @@ define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr { ; CHECK-LABEL: TailCallExtrnFuncPtr: -; CHECK: .localentry TailCallExtrnFuncPtr, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 ; CHECK-NEXT: .Lpcrel0: ; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) @@ -65,8 +63,7 @@ define dso_local signext i32 @TailCallParamFuncPtr(i32 (...)* nocapture %passedfunc) local_unnamed_addr { ; CHECK-LABEL: TailCallParamFuncPtr: -; CHECK: .localentry TailCallParamFuncPtr, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: mr r12, r3 ; CHECK-NEXT: bctr @@ -79,8 +76,7 @@ define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfunc, i32 signext %a) local_unnamed_addr { ; CHECK-LABEL: NoTailIndirectCall: -; CHECK: .localentry NoTailIndirectCall, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 @@ -88,8 +84,8 @@ ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -48(r1) -; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: mr r12, r3 +; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: mr r30, r4 ; CHECK-NEXT: bctrl ; CHECK-NEXT: add r3, r3, r30 @@ -108,8 +104,7 @@ define dso_local signext i32 @TailCallDirect() local_unnamed_addr { ; CHECK-LABEL: TailCallDirect: -; CHECK: .localentry TailCallDirect, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: b Function@notoc ; CHECK-NEXT: #TC_RETURNd8 Function@notoc 0 entry: @@ -119,8 +114,7 @@ define dso_local signext i32 @NoTailCallDirect(i32 signext %a) local_unnamed_addr { ; CHECK-LABEL: NoTailCallDirect: -; CHECK: .localentry NoTailCallDirect, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 @@ -145,8 +139,7 @@ define dso_local signext i32 @TailCallDirectLocal() local_unnamed_addr { ; CHECK-LABEL: TailCallDirectLocal: -; CHECK: .localentry TailCallDirectLocal, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: b LocalFunction@notoc ; CHECK-NEXT: #TC_RETURNd8 LocalFunction@notoc 0 entry: @@ -156,8 +149,7 @@ define dso_local signext i32 @NoTailCallDirectLocal(i32 signext %a) local_unnamed_addr { ; CHECK-LABEL: NoTailCallDirectLocal: -; CHECK: .localentry NoTailCallDirectLocal, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 @@ -182,8 +174,7 @@ define dso_local signext i32 @TailCallAbs() local_unnamed_addr { ; CHECK-LABEL: TailCallAbs: -; CHECK: .localentry TailCallAbs, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li r3, 400 ; CHECK-NEXT: li r12, 400 ; CHECK-NEXT: mtctr r3 @@ -196,8 +187,7 @@ define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr { ; CHECK-LABEL: NoTailCallAbs: -; CHECK: .localentry NoTailCallAbs, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 @@ -227,8 +217,7 @@ ; This function should be tail called and not inlined. define internal fastcc signext i32 @LocalFunction() unnamed_addr #0 { ; CHECK-LABEL: LocalFunction: -; CHECK: .localentry LocalFunction, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: #APP ; CHECK-NEXT: li r3, 42 ; CHECK-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll @@ -16,11 +16,11 @@ ; CHECK-NEXT: lxv v4, 0(0) ; CHECK-NEXT: xxlxor v0, v0, v0 ; CHECK-NEXT: xxlxor v1, v1, v1 -; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill ; CHECK-NEXT: xxlxor v2, v2, v2 ; CHECK-NEXT: li r6, 1 ; CHECK-NEXT: li r4, 16 +; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill ; CHECK-NEXT: extswsli r3, r3, 3 ; CHECK-NEXT: xvmaddadp v1, v4, v1 ; CHECK-NEXT: lxvdsx v5, 0, r3 @@ -29,30 +29,30 @@ ; CHECK-NEXT: .LBB0_1: # %bb9 ; CHECK-NEXT: # ; CHECK-NEXT: addi r6, r6, 2 -; CHECK-NEXT: lxv vs1, -64(r5) -; CHECK-NEXT: lxv vs2, -16(r5) ; CHECK-NEXT: lxv vs0, 16(0) +; CHECK-NEXT: lxv vs1, -64(r5) +; CHECK-NEXT: xxlxor v7, v7, v7 ; CHECK-NEXT: vmr v9, v0 ; CHECK-NEXT: xxlxor v10, v10, v10 -; CHECK-NEXT: xxlxor v7, v7, v7 ; CHECK-NEXT: mulld r6, r6, r3 -; CHECK-NEXT: xvmaddadp v9, vs1, v2 -; CHECK-NEXT: xxlxor v8, v8, v8 -; CHECK-NEXT: xvmaddadp v10, vs2, v10 ; CHECK-NEXT: xvmaddadp v7, vs0, v5 ; CHECK-NEXT: xvmuldp v6, vs0, v2 +; CHECK-NEXT: lxv vs0, -16(r5) +; CHECK-NEXT: xvmaddadp v9, vs1, v2 +; CHECK-NEXT: xxlxor v8, v8, v8 ; CHECK-NEXT: xvmaddadp v7, v2, v2 ; CHECK-NEXT: xvmaddadp v6, v2, v2 ; CHECK-NEXT: lxvdsx v14, r6, r4 -; CHECK-NEXT: xvmaddadp v8, vs1, v8 ; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: xvmuldp v11, vs2, v14 +; CHECK-NEXT: xvmaddadp v8, vs1, v8 +; CHECK-NEXT: xvmaddadp v10, vs0, v10 ; CHECK-NEXT: xvmuldp v3, vs1, v14 +; CHECK-NEXT: xvmuldp v11, vs0, v14 ; CHECK-NEXT: xvmuldp vs5, v14, v2 ; CHECK-NEXT: xvmuldp v13, v4, v14 +; CHECK-NEXT: xxlor vs0, v2, v2 ; CHECK-NEXT: vmr v12, v2 ; CHECK-NEXT: xxlor vs14, v10, v10 -; CHECK-NEXT: xxlor vs0, v2, v2 ; CHECK-NEXT: xxlor vs4, v2, v2 ; CHECK-NEXT: # kill: def $vsrp2 killed $vsrp2 def $uacc1 ; CHECK-NEXT: xxlor vs6, v6, v6 @@ -60,11 +60,11 @@ ; CHECK-NEXT: xxlor vs8, v12, v12 ; CHECK-NEXT: xxlor vs9, v13, v13 ; CHECK-NEXT: vmr v12, v1 -; CHECK-NEXT: xxlor vs15, v11, v11 -; CHECK-NEXT: vmr v10, v2 ; CHECK-NEXT: xxlor vs1, v3, v3 ; CHECK-NEXT: xxlor vs2, v8, v8 ; CHECK-NEXT: xxlor vs3, v9, v9 +; CHECK-NEXT: xxlor vs15, v11, v11 +; CHECK-NEXT: vmr v10, v2 ; CHECK-NEXT: xxlor vs10, v12, v12 ; CHECK-NEXT: xxlor vs11, v13, v13 ; CHECK-NEXT: xxmtacc acc1 @@ -72,8 +72,8 @@ ; CHECK-NEXT: xxlor vs13, v11, v11 ; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: xxmtacc acc2 -; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0 ; CHECK-NEXT: xxmtacc acc3 +; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0 ; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0 ; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0 ; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0 @@ -117,11 +117,11 @@ ; TRACKLIVE-NEXT: lxv v4, 0(0) ; TRACKLIVE-NEXT: xxlxor v0, v0, v0 ; TRACKLIVE-NEXT: xxlxor v1, v1, v1 -; TRACKLIVE-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill -; TRACKLIVE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill ; TRACKLIVE-NEXT: xxlxor v2, v2, v2 ; TRACKLIVE-NEXT: li r6, 1 ; TRACKLIVE-NEXT: li r4, 16 +; TRACKLIVE-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; TRACKLIVE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill ; TRACKLIVE-NEXT: extswsli r3, r3, 3 ; TRACKLIVE-NEXT: xvmaddadp v1, v4, v1 ; TRACKLIVE-NEXT: lxvdsx v5, 0, r3 @@ -131,33 +131,33 @@ ; TRACKLIVE-NEXT: # ; TRACKLIVE-NEXT: addi r6, r6, 2 ; TRACKLIVE-NEXT: lxv vs0, 16(0) -; TRACKLIVE-NEXT: xxlxor vs7, vs7, vs7 ; TRACKLIVE-NEXT: lxv vs1, -64(r5) -; TRACKLIVE-NEXT: lxv vs4, -16(r5) -; TRACKLIVE-NEXT: xxlxor vs12, vs12, vs12 +; TRACKLIVE-NEXT: xxlxor vs7, vs7, vs7 ; TRACKLIVE-NEXT: xxlor vs3, v0, v0 ; TRACKLIVE-NEXT: xxlxor vs2, vs2, vs2 +; TRACKLIVE-NEXT: xxlxor vs12, vs12, vs12 ; TRACKLIVE-NEXT: mulld r6, r6, r3 ; TRACKLIVE-NEXT: xxlor vs10, v2, v2 +; TRACKLIVE-NEXT: xxlor vs4, v2, v2 ; TRACKLIVE-NEXT: xxlor vs8, vs10, vs10 ; TRACKLIVE-NEXT: xxlor vs10, v1, v1 ; TRACKLIVE-NEXT: xvmaddadp vs7, vs0, v5 ; TRACKLIVE-NEXT: xvmuldp vs6, vs0, v2 -; TRACKLIVE-NEXT: xvmaddadp vs12, vs4, vs12 +; TRACKLIVE-NEXT: lxv vs0, -16(r5) ; TRACKLIVE-NEXT: xvmaddadp vs3, vs1, v2 ; TRACKLIVE-NEXT: xvmaddadp vs2, vs1, vs2 -; TRACKLIVE-NEXT: xxlor vs0, v2, v2 ; TRACKLIVE-NEXT: lxvdsx v6, r6, r4 ; TRACKLIVE-NEXT: li r6, 0 ; TRACKLIVE-NEXT: xvmaddadp vs7, v2, v2 ; TRACKLIVE-NEXT: xvmaddadp vs6, v2, v2 -; TRACKLIVE-NEXT: xxlor vs14, vs12, vs12 -; TRACKLIVE-NEXT: xxlor vs12, v2, v2 +; TRACKLIVE-NEXT: xvmaddadp vs12, vs0, vs12 ; TRACKLIVE-NEXT: xvmuldp v3, vs1, v6 ; TRACKLIVE-NEXT: xvmuldp vs11, v4, v6 -; TRACKLIVE-NEXT: xvmuldp vs13, vs4, v6 +; TRACKLIVE-NEXT: xvmuldp vs13, vs0, v6 ; TRACKLIVE-NEXT: xvmuldp vs5, v6, v2 -; TRACKLIVE-NEXT: xxlor vs4, v2, v2 +; TRACKLIVE-NEXT: xxlor vs0, v2, v2 +; TRACKLIVE-NEXT: xxlor vs14, vs12, vs12 +; TRACKLIVE-NEXT: xxlor vs12, v2, v2 ; TRACKLIVE-NEXT: xxlor vs1, v3, v3 ; TRACKLIVE-NEXT: xxlor vs9, vs11, vs11 ; TRACKLIVE-NEXT: xxlor vs15, vs13, vs13 diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -66,9 +66,9 @@ ; LE-P10-NEXT: clrldi r3, r3, 32 ; LE-P10-NEXT: addi r1, r1, 64 ; LE-P10-NEXT: ld r0, 16(r1) -; LE-P10-NEXT: mtlr r0 ; LE-P10-NEXT: hashchk r0, -24(r1) ; LE-P10-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; LE-P10-NEXT: mtlr r0 ; LE-P10-NEXT: blr ; ; LE-P9-LABEL: caller: @@ -189,8 +189,8 @@ ; BE-P10-NEXT: clrldi r3, r3, 32 ; BE-P10-NEXT: addi r1, r1, 144 ; BE-P10-NEXT: ld r0, 16(r1) -; BE-P10-NEXT: mtlr r0 ; BE-P10-NEXT: hashchk r0, -24(r1) +; BE-P10-NEXT: mtlr r0 ; BE-P10-NEXT: blr ; ; BE-P9-LABEL: caller: @@ -244,9 +244,9 @@ ; LE-P10-PRIV-NEXT: clrldi r3, r3, 32 ; LE-P10-PRIV-NEXT: addi r1, r1, 64 ; LE-P10-PRIV-NEXT: ld r0, 16(r1) -; LE-P10-PRIV-NEXT: mtlr r0 ; LE-P10-PRIV-NEXT: hashchkp r0, -24(r1) ; LE-P10-PRIV-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: mtlr r0 ; LE-P10-PRIV-NEXT: blr ; ; LE-P9-PRIV-LABEL: caller: @@ -302,8 +302,8 @@ ; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 ; BE-P10-PRIV-NEXT: addi r1, r1, 144 ; BE-P10-PRIV-NEXT: ld r0, 16(r1) -; BE-P10-PRIV-NEXT: mtlr r0 ; BE-P10-PRIV-NEXT: hashchkp r0, -24(r1) +; BE-P10-PRIV-NEXT: mtlr r0 ; BE-P10-PRIV-NEXT: blr ; ; BE-P9-PRIV-LABEL: caller: @@ -365,30 +365,18 @@ ; LE-P10-NEXT: lwz r4, 12(r3) ; LE-P10-NEXT: std r14, 256(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r15, 264(r1) # 8-byte Folded Spill -; LE-P10-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill -; LE-P10-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill -; LE-P10-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill ; LE-P10-NEXT: std r16, 272(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r17, 280(r1) # 8-byte Folded Spill -; LE-P10-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill ; LE-P10-NEXT: std r18, 288(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r19, 296(r1) # 8-byte Folded Spill -; LE-P10-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill -; LE-P10-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill ; LE-P10-NEXT: std r20, 304(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r21, 312(r1) # 8-byte Folded Spill -; LE-P10-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill ; LE-P10-NEXT: std r22, 320(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r23, 328(r1) # 8-byte Folded Spill -; LE-P10-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill -; LE-P10-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill ; LE-P10-NEXT: std r24, 336(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r25, 344(r1) # 8-byte Folded Spill -; LE-P10-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill ; LE-P10-NEXT: std r26, 352(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r27, 360(r1) # 8-byte Folded Spill -; LE-P10-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill -; LE-P10-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill ; LE-P10-NEXT: std r28, 368(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r29, 376(r1) # 8-byte Folded Spill ; LE-P10-NEXT: std r30, 384(r1) # 8-byte Folded Spill @@ -411,6 +399,18 @@ ; LE-P10-NEXT: stfd f29, 520(r1) # 8-byte Folded Spill ; LE-P10-NEXT: stfd f30, 528(r1) # 8-byte Folded Spill ; LE-P10-NEXT: stfd f31, 536(r1) # 8-byte Folded Spill +; LE-P10-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill +; LE-P10-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill ; LE-P10-NEXT: std r3, 40(r1) # 8-byte Folded Spill ; LE-P10-NEXT: stw r4, 52(r1) ; LE-P10-NEXT: #APP @@ -436,40 +436,40 @@ ; LE-P10-NEXT: lfd f29, 520(r1) # 8-byte Folded Reload ; LE-P10-NEXT: lfd f28, 512(r1) # 8-byte Folded Reload ; LE-P10-NEXT: lfd f27, 504(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f26, 496(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f25, 488(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f24, 480(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f23, 472(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f22, 464(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f21, 456(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f20, 448(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f19, 440(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f18, 432(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f17, 424(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f16, 416(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f15, 408(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lfd f14, 400(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r31, 392(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r30, 384(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r29, 376(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f26, 496(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r28, 368(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r27, 360(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r26, 352(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f25, 488(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r25, 344(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r24, 336(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r23, 328(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f24, 480(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r22, 320(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r21, 312(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lwz r4, 16(r4) -; LE-P10-NEXT: add r3, r4, r3 -; LE-P10-NEXT: lfd f23, 472(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f22, 464(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r20, 304(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r19, 296(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r18, 288(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r17, 280(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r16, 272(r1) # 8-byte Folded Reload ; LE-P10-NEXT: ld r15, 264(r1) # 8-byte Folded Reload -; LE-P10-NEXT: ld r14, 256(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f21, 456(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f20, 448(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f19, 440(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f18, 432(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f17, 424(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f16, 416(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f15, 408(r1) # 8-byte Folded Reload -; LE-P10-NEXT: lfd f14, 400(r1) # 8-byte Folded Reload +; LE-P10-NEXT: lwz r4, 16(r4) +; LE-P10-NEXT: add r3, r4, r3 ; LE-P10-NEXT: clrldi r3, r3, 32 +; LE-P10-NEXT: ld r14, 256(r1) # 8-byte Folded Reload ; LE-P10-NEXT: addi r1, r1, 544 ; LE-P10-NEXT: ld r0, 16(r1) ; LE-P10-NEXT: lwz r12, 8(r1) @@ -1177,30 +1177,18 @@ ; BE-P10-NEXT: lwz r4, 12(r3) ; BE-P10-NEXT: std r14, 336(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r15, 344(r1) # 8-byte Folded Spill -; BE-P10-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill -; BE-P10-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill -; BE-P10-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill ; BE-P10-NEXT: std r16, 352(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r17, 360(r1) # 8-byte Folded Spill -; BE-P10-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill ; BE-P10-NEXT: std r18, 368(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r19, 376(r1) # 8-byte Folded Spill -; BE-P10-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill -; BE-P10-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill ; BE-P10-NEXT: std r20, 384(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r21, 392(r1) # 8-byte Folded Spill -; BE-P10-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill ; BE-P10-NEXT: std r22, 400(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r23, 408(r1) # 8-byte Folded Spill -; BE-P10-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill -; BE-P10-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill ; BE-P10-NEXT: std r24, 416(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r25, 424(r1) # 8-byte Folded Spill -; BE-P10-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill ; BE-P10-NEXT: std r26, 432(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r27, 440(r1) # 8-byte Folded Spill -; BE-P10-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill -; BE-P10-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill ; BE-P10-NEXT: std r28, 448(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r29, 456(r1) # 8-byte Folded Spill ; BE-P10-NEXT: std r30, 464(r1) # 8-byte Folded Spill @@ -1223,6 +1211,18 @@ ; BE-P10-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill ; BE-P10-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill ; BE-P10-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill ; BE-P10-NEXT: std r3, 120(r1) # 8-byte Folded Spill ; BE-P10-NEXT: stw r4, 132(r1) ; BE-P10-NEXT: #APP @@ -1249,40 +1249,40 @@ ; BE-P10-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload ; BE-P10-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload ; BE-P10-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r31, 472(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r30, 464(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r29, 456(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r28, 448(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r27, 440(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r26, 432(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r25, 424(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r24, 416(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r23, 408(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r22, 400(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r21, 392(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lwz r4, 16(r4) -; BE-P10-NEXT: add r3, r4, r3 -; BE-P10-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r20, 384(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r19, 376(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r18, 368(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r17, 360(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r16, 352(r1) # 8-byte Folded Reload ; BE-P10-NEXT: ld r15, 344(r1) # 8-byte Folded Reload -; BE-P10-NEXT: ld r14, 336(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload -; BE-P10-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lwz r4, 16(r4) +; BE-P10-NEXT: add r3, r4, r3 ; BE-P10-NEXT: clrldi r3, r3, 32 +; BE-P10-NEXT: ld r14, 336(r1) # 8-byte Folded Reload ; BE-P10-NEXT: addi r1, r1, 624 ; BE-P10-NEXT: ld r0, 16(r1) ; BE-P10-NEXT: lwz r12, 8(r1) @@ -1582,30 +1582,18 @@ ; LE-P10-PRIV-NEXT: lwz r4, 12(r3) ; LE-P10-PRIV-NEXT: std r14, 256(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r15, 264(r1) # 8-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill ; LE-P10-PRIV-NEXT: std r16, 272(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r17, 280(r1) # 8-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill ; LE-P10-PRIV-NEXT: std r18, 288(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r19, 296(r1) # 8-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill ; LE-P10-PRIV-NEXT: std r20, 304(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r21, 312(r1) # 8-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill ; LE-P10-PRIV-NEXT: std r22, 320(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r23, 328(r1) # 8-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill ; LE-P10-PRIV-NEXT: std r24, 336(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r25, 344(r1) # 8-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill ; LE-P10-PRIV-NEXT: std r26, 352(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r27, 360(r1) # 8-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill -; LE-P10-PRIV-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill ; LE-P10-PRIV-NEXT: std r28, 368(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r29, 376(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: std r30, 384(r1) # 8-byte Folded Spill @@ -1628,6 +1616,18 @@ ; LE-P10-PRIV-NEXT: stfd f29, 520(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: stfd f30, 528(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: stfd f31, 536(r1) # 8-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill +; LE-P10-PRIV-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill ; LE-P10-PRIV-NEXT: std r3, 40(r1) # 8-byte Folded Spill ; LE-P10-PRIV-NEXT: stw r4, 52(r1) ; LE-P10-PRIV-NEXT: #APP @@ -1653,40 +1653,40 @@ ; LE-P10-PRIV-NEXT: lfd f29, 520(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: lfd f28, 512(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: lfd f27, 504(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f26, 496(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f25, 488(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f24, 480(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f23, 472(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f22, 464(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f21, 456(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f20, 448(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f19, 440(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f18, 432(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f17, 424(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f16, 416(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f15, 408(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lfd f14, 400(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r31, 392(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r30, 384(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r29, 376(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f26, 496(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r28, 368(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r27, 360(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r26, 352(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f25, 488(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r25, 344(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r24, 336(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r23, 328(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f24, 480(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r22, 320(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r21, 312(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lwz r4, 16(r4) -; LE-P10-PRIV-NEXT: add r3, r4, r3 -; LE-P10-PRIV-NEXT: lfd f23, 472(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f22, 464(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r20, 304(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r19, 296(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r18, 288(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r17, 280(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r16, 272(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: ld r15, 264(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: ld r14, 256(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f21, 456(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f20, 448(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f19, 440(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f18, 432(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f17, 424(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f16, 416(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f15, 408(r1) # 8-byte Folded Reload -; LE-P10-PRIV-NEXT: lfd f14, 400(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: lwz r4, 16(r4) +; LE-P10-PRIV-NEXT: add r3, r4, r3 ; LE-P10-PRIV-NEXT: clrldi r3, r3, 32 +; LE-P10-PRIV-NEXT: ld r14, 256(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: addi r1, r1, 544 ; LE-P10-PRIV-NEXT: ld r0, 16(r1) ; LE-P10-PRIV-NEXT: lwz r12, 8(r1) @@ -1986,30 +1986,18 @@ ; BE-P10-PRIV-NEXT: lwz r4, 12(r3) ; BE-P10-PRIV-NEXT: std r14, 336(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r15, 344(r1) # 8-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill ; BE-P10-PRIV-NEXT: std r16, 352(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r17, 360(r1) # 8-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill ; BE-P10-PRIV-NEXT: std r18, 368(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r19, 376(r1) # 8-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill ; BE-P10-PRIV-NEXT: std r20, 384(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r21, 392(r1) # 8-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill ; BE-P10-PRIV-NEXT: std r22, 400(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r23, 408(r1) # 8-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill ; BE-P10-PRIV-NEXT: std r24, 416(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r25, 424(r1) # 8-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill ; BE-P10-PRIV-NEXT: std r26, 432(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r27, 440(r1) # 8-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill -; BE-P10-PRIV-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill ; BE-P10-PRIV-NEXT: std r28, 448(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r29, 456(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: std r30, 464(r1) # 8-byte Folded Spill @@ -2032,6 +2020,18 @@ ; BE-P10-PRIV-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill ; BE-P10-PRIV-NEXT: std r3, 120(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: stw r4, 132(r1) ; BE-P10-PRIV-NEXT: #APP @@ -2058,40 +2058,40 @@ ; BE-P10-PRIV-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r31, 472(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r30, 464(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r29, 456(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r28, 448(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r27, 440(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r26, 432(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r25, 424(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r24, 416(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r23, 408(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r22, 400(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r21, 392(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lwz r4, 16(r4) -; BE-P10-PRIV-NEXT: add r3, r4, r3 -; BE-P10-PRIV-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r20, 384(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r19, 376(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r18, 368(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r17, 360(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r16, 352(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: ld r15, 344(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: ld r14, 336(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lwz r4, 16(r4) +; BE-P10-PRIV-NEXT: add r3, r4, r3 ; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P10-PRIV-NEXT: ld r14, 336(r1) # 8-byte Folded Reload ; BE-P10-PRIV-NEXT: addi r1, r1, 624 ; BE-P10-PRIV-NEXT: ld r0, 16(r1) ; BE-P10-PRIV-NEXT: lwz r12, 8(r1) @@ -2416,9 +2416,9 @@ ; LE-P10-NEXT: addi r1, r1, 64 ; LE-P10-NEXT: ld r0, 16(r1) ; LE-P10-NEXT: clrldi r3, r3, 32 -; LE-P10-NEXT: mtlr r0 ; LE-P10-NEXT: hashchk r0, -24(r1) ; LE-P10-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; LE-P10-NEXT: mtlr r0 ; LE-P10-NEXT: blr ; LE-P10-NEXT: .LBB2_2: ; LE-P10-NEXT: li r3, 0 @@ -2603,8 +2603,8 @@ ; BE-P10-NEXT: addi r1, r1, 144 ; BE-P10-NEXT: ld r0, 16(r1) ; BE-P10-NEXT: clrldi r3, r3, 32 -; BE-P10-NEXT: mtlr r0 ; BE-P10-NEXT: hashchk r0, -24(r1) +; BE-P10-NEXT: mtlr r0 ; BE-P10-NEXT: blr ; BE-P10-NEXT: .LBB2_2: ; BE-P10-NEXT: li r3, 0 @@ -2688,9 +2688,9 @@ ; LE-P10-PRIV-NEXT: addi r1, r1, 64 ; LE-P10-PRIV-NEXT: ld r0, 16(r1) ; LE-P10-PRIV-NEXT: clrldi r3, r3, 32 -; LE-P10-PRIV-NEXT: mtlr r0 ; LE-P10-PRIV-NEXT: hashchkp r0, -24(r1) ; LE-P10-PRIV-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; LE-P10-PRIV-NEXT: mtlr r0 ; LE-P10-PRIV-NEXT: blr ; LE-P10-PRIV-NEXT: .LBB2_2: ; LE-P10-PRIV-NEXT: li r3, 0 @@ -2776,8 +2776,8 @@ ; BE-P10-PRIV-NEXT: addi r1, r1, 144 ; BE-P10-PRIV-NEXT: ld r0, 16(r1) ; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 -; BE-P10-PRIV-NEXT: mtlr r0 ; BE-P10-PRIV-NEXT: hashchkp r0, -24(r1) +; BE-P10-PRIV-NEXT: mtlr r0 ; BE-P10-PRIV-NEXT: blr ; BE-P10-PRIV-NEXT: .LBB2_2: ; BE-P10-PRIV-NEXT: li r3, 0 @@ -2870,10 +2870,10 @@ ; LE-P10-NEXT: lis r12, -1 ; LE-P10-NEXT: std r30, -16(r1) ; LE-P10-NEXT: mr r30, r1 -; LE-P10-NEXT: ori r12, r12, 0 ; LE-P10-NEXT: std r0, 16(r1) ; LE-P10-NEXT: hashst r0, -32(r1) ; LE-P10-NEXT: clrldi r0, r1, 49 +; LE-P10-NEXT: ori r12, r12, 0 ; LE-P10-NEXT: subc r0, r12, r0 ; LE-P10-NEXT: stdux r1, r1, r0 ; LE-P10-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -2890,9 +2890,9 @@ ; LE-P10-NEXT: lwz r3, 20(r29) ; LE-P10-NEXT: lis r4, 0 ; LE-P10-NEXT: ori r4, r4, 65500 +; LE-P10-NEXT: add r4, r1, r4 ; LE-P10-NEXT: stw r3, 32764(r1) ; LE-P10-NEXT: lis r3, 0 -; LE-P10-NEXT: add r4, r1, r4 ; LE-P10-NEXT: ori r3, r3, 32768 ; LE-P10-NEXT: add r3, r1, r3 ; LE-P10-NEXT: bl callee3@notoc @@ -3133,10 +3133,10 @@ ; BE-P10-NEXT: lis r12, -1 ; BE-P10-NEXT: std r30, -16(r1) ; BE-P10-NEXT: mr r30, r1 -; BE-P10-NEXT: ori r12, r12, 0 ; BE-P10-NEXT: std r0, 16(r1) ; BE-P10-NEXT: hashst r0, -32(r1) ; BE-P10-NEXT: clrldi r0, r1, 49 +; BE-P10-NEXT: ori r12, r12, 0 ; BE-P10-NEXT: subc r0, r12, r0 ; BE-P10-NEXT: stdux r1, r1, r0 ; BE-P10-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -3153,9 +3153,9 @@ ; BE-P10-NEXT: lwz r3, 20(r29) ; BE-P10-NEXT: lis r4, 0 ; BE-P10-NEXT: ori r4, r4, 65500 +; BE-P10-NEXT: add r4, r1, r4 ; BE-P10-NEXT: stw r3, 32764(r1) ; BE-P10-NEXT: lis r3, 0 -; BE-P10-NEXT: add r4, r1, r4 ; BE-P10-NEXT: ori r3, r3, 32768 ; BE-P10-NEXT: add r3, r1, r3 ; BE-P10-NEXT: bl callee3 @@ -3265,10 +3265,10 @@ ; LE-P10-PRIV-NEXT: lis r12, -1 ; LE-P10-PRIV-NEXT: std r30, -16(r1) ; LE-P10-PRIV-NEXT: mr r30, r1 -; LE-P10-PRIV-NEXT: ori r12, r12, 0 ; LE-P10-PRIV-NEXT: std r0, 16(r1) ; LE-P10-PRIV-NEXT: hashstp r0, -32(r1) ; LE-P10-PRIV-NEXT: clrldi r0, r1, 49 +; LE-P10-PRIV-NEXT: ori r12, r12, 0 ; LE-P10-PRIV-NEXT: subc r0, r12, r0 ; LE-P10-PRIV-NEXT: stdux r1, r1, r0 ; LE-P10-PRIV-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -3285,9 +3285,9 @@ ; LE-P10-PRIV-NEXT: lwz r3, 20(r29) ; LE-P10-PRIV-NEXT: lis r4, 0 ; LE-P10-PRIV-NEXT: ori r4, r4, 65500 +; LE-P10-PRIV-NEXT: add r4, r1, r4 ; LE-P10-PRIV-NEXT: stw r3, 32764(r1) ; LE-P10-PRIV-NEXT: lis r3, 0 -; LE-P10-PRIV-NEXT: add r4, r1, r4 ; LE-P10-PRIV-NEXT: ori r3, r3, 32768 ; LE-P10-PRIV-NEXT: add r3, r1, r3 ; LE-P10-PRIV-NEXT: bl callee3@notoc @@ -3396,10 +3396,10 @@ ; BE-P10-PRIV-NEXT: lis r12, -1 ; BE-P10-PRIV-NEXT: std r30, -16(r1) ; BE-P10-PRIV-NEXT: mr r30, r1 -; BE-P10-PRIV-NEXT: ori r12, r12, 0 ; BE-P10-PRIV-NEXT: std r0, 16(r1) ; BE-P10-PRIV-NEXT: hashstp r0, -32(r1) ; BE-P10-PRIV-NEXT: clrldi r0, r1, 49 +; BE-P10-PRIV-NEXT: ori r12, r12, 0 ; BE-P10-PRIV-NEXT: subc r0, r12, r0 ; BE-P10-PRIV-NEXT: stdux r1, r1, r0 ; BE-P10-PRIV-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -3416,9 +3416,9 @@ ; BE-P10-PRIV-NEXT: lwz r3, 20(r29) ; BE-P10-PRIV-NEXT: lis r4, 0 ; BE-P10-PRIV-NEXT: ori r4, r4, 65500 +; BE-P10-PRIV-NEXT: add r4, r1, r4 ; BE-P10-PRIV-NEXT: stw r3, 32764(r1) ; BE-P10-PRIV-NEXT: lis r3, 0 -; BE-P10-PRIV-NEXT: add r4, r1, r4 ; BE-P10-PRIV-NEXT: ori r3, r3, 32768 ; BE-P10-PRIV-NEXT: add r3, r1, r3 ; BE-P10-PRIV-NEXT: bl callee3 diff --git a/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll --- a/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll @@ -3932,9 +3932,9 @@ define dso_local void @st_not_disjoint64_double_uint8_t(i64 %ptr, double %str) { ; CHECK-P10-LABEL: st_not_disjoint64_double_uint8_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stxsibx f0, 0, r3 @@ -4371,9 +4371,9 @@ define dso_local void @st_not_disjoint64_double_int8_t(i64 %ptr, double %str) { ; CHECK-P10-LABEL: st_not_disjoint64_double_int8_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stxsibx f0, 0, r3 @@ -4814,9 +4814,9 @@ define dso_local void @st_not_disjoint64_double_uint16_t(i64 %ptr, double %str) { ; CHECK-P10-LABEL: st_not_disjoint64_double_uint16_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stxsihx f0, 0, r3 @@ -5257,9 +5257,9 @@ define dso_local void @st_not_disjoint64_double_int16_t(i64 %ptr, double %str) { ; CHECK-P10-LABEL: st_not_disjoint64_double_int16_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stxsihx f0, 0, r3 @@ -5626,9 +5626,9 @@ define dso_local void @st_not_disjoint64_double_uint32_t(i64 %ptr, double %str) { ; CHECK-P10-LABEL: st_not_disjoint64_double_uint32_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stfiwx f0, 0, r3 @@ -5958,9 +5958,9 @@ define dso_local void @st_not_disjoint64_double_int32_t(i64 %ptr, double %str) { ; CHECK-P10-LABEL: st_not_disjoint64_double_int32_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stfiwx f0, 0, r3 diff --git a/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll --- a/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll @@ -3954,9 +3954,9 @@ define dso_local void @st_not_disjoint64_float_uint8_t(i64 %ptr, float %str) { ; CHECK-P10-LABEL: st_not_disjoint64_float_uint8_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stxsibx f0, 0, r3 @@ -4393,9 +4393,9 @@ define dso_local void @st_not_disjoint64_float_int8_t(i64 %ptr, float %str) { ; CHECK-P10-LABEL: st_not_disjoint64_float_int8_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stxsibx f0, 0, r3 @@ -4836,9 +4836,9 @@ define dso_local void @st_not_disjoint64_float_uint16_t(i64 %ptr, float %str) { ; CHECK-P10-LABEL: st_not_disjoint64_float_uint16_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stxsihx f0, 0, r3 @@ -5279,9 +5279,9 @@ define dso_local void @st_not_disjoint64_float_int16_t(i64 %ptr, float %str) { ; CHECK-P10-LABEL: st_not_disjoint64_float_int16_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stxsihx f0, 0, r3 @@ -5648,9 +5648,9 @@ define dso_local void @st_not_disjoint64_float_uint32_t(i64 %ptr, float %str) { ; CHECK-P10-LABEL: st_not_disjoint64_float_uint32_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpuxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stfiwx f0, 0, r3 @@ -5980,9 +5980,9 @@ define dso_local void @st_not_disjoint64_float_int32_t(i64 %ptr, float %str) { ; CHECK-P10-LABEL: st_not_disjoint64_float_int32_t: ; CHECK-P10: # %bb.0: # %entry -; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: pli r4, 232 ; CHECK-P10-NEXT: pli r5, 3567587329 +; CHECK-P10-NEXT: xscvdpsxws f0, f1 ; CHECK-P10-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P10-NEXT: or r3, r3, r5 ; CHECK-P10-NEXT: stfiwx f0, 0, r3 diff --git a/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll --- a/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll @@ -6304,8 +6304,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: stfs f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -6340,8 +6340,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -6692,8 +6692,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: stfd f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -6728,8 +6728,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -7652,8 +7652,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: stfs f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -7688,8 +7688,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -8041,8 +8041,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: stfd f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -8077,8 +8077,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll --- a/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll @@ -6368,8 +6368,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: stfs f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -6404,8 +6404,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -6756,8 +6756,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: stfd f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -6792,8 +6792,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -7430,8 +7430,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: stfs f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -7466,8 +7466,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -7818,8 +7818,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: stfd f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -7854,8 +7854,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll --- a/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll @@ -7006,8 +7006,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprd f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: stfs f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -7042,8 +7042,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprd f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -7413,8 +7413,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprd f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: stfd f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -7449,8 +7449,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprd f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -7820,8 +7820,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprd f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: stfs f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -7856,8 +7856,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprd f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -8227,8 +8227,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprd f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: stfd f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -8263,8 +8263,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprd f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll --- a/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll @@ -7484,8 +7484,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: stfs f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -7520,8 +7520,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvuxdsp f0, f0 ; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -7872,8 +7872,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: stfd f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -7908,8 +7908,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwz f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvuxddp f0, f0 ; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -9118,8 +9118,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: stfs f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -9154,8 +9154,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvsxdsp f0, f0 ; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; @@ -9506,8 +9506,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: ori r3, r3, 34463 -; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: oris r3, r3, 1 +; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: stfd f0, 0(r3) ; CHECK-P10-NEXT: blr ; @@ -9542,8 +9542,8 @@ ; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mtfprwa f0, r4 ; CHECK-P10-NEXT: lis r5, -15264 -; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: and r3, r3, r5 +; CHECK-P10-NEXT: xscvsxddp f0, f0 ; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0 ; CHECK-P10-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll b/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll --- a/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll +++ b/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll @@ -11,34 +11,34 @@ ; CHECK-NEXT: stdu r1, -400(r1) ; CHECK-NEXT: stfd f14, 256(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f15, 264(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill -; CHECK-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill ; CHECK-NEXT: stfd f16, 272(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 280(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill -; CHECK-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill ; CHECK-NEXT: stfd f18, 288(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f19, 296(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill ; CHECK-NEXT: stfd f20, 304(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f21, 312(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill -; CHECK-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill ; CHECK-NEXT: stfd f22, 320(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f23, 328(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill ; CHECK-NEXT: stfd f24, 336(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f25, 344(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill -; CHECK-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill ; CHECK-NEXT: stfd f26, 352(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f27, 360(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill ; CHECK-NEXT: stfd f28, 368(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f29, 376(r1) # 8-byte Folded Spill -; CHECK-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill ; CHECK-NEXT: stfd f30, 384(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f31, 392(r1) # 8-byte Folded Spill +; CHECK-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill ; CHECK-NEXT: lxvp vsp34, 0(r3) ; CHECK-NEXT: stxvp vsp34, 32(r1) # 32-byte Folded Spill ; CHECK-NEXT: #APP @@ -84,34 +84,34 @@ ; CHECK-BE-NEXT: stdu r1, -416(r1) ; CHECK-BE-NEXT: stfd f14, 272(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f15, 280(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill -; CHECK-BE-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: stfd f16, 288(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f17, 296(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill -; CHECK-BE-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: stfd f18, 304(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f19, 312(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: stfd f20, 320(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f21, 328(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill -; CHECK-BE-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: stfd f22, 336(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f23, 344(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: stfd f24, 352(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f25, 360(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill -; CHECK-BE-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: stfd f26, 368(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f27, 376(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: stfd f28, 384(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f29, 392(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: stfd f30, 400(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: stfd f31, 408(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill ; CHECK-BE-NEXT: lxvp vsp34, 0(r3) ; CHECK-BE-NEXT: stxvp vsp34, 48(r1) # 32-byte Folded Spill ; CHECK-BE-NEXT: #APP diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll --- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll +++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll @@ -70,12 +70,11 @@ ; LE-NEXT: blr ; ; CHECK-P10-LE-LABEL: testCompare1: -; CHECK-P10-LE: .localentry testCompare1, 1 -; CHECK-P10-LE-NEXT: # %bb.0: # %entry +; CHECK-P10-LE: # %bb.0: # %entry ; CHECK-P10-LE-NEXT: plbz r4, testCompare1@PCREL(0), 1 ; CHECK-P10-LE-NEXT: lbz r3, 0(r3) -; CHECK-P10-LE-NEXT: clrlwi r3, r3, 31 ; CHECK-P10-LE-NEXT: clrlwi r4, r4, 31 +; CHECK-P10-LE-NEXT: clrlwi r3, r3, 31 ; CHECK-P10-LE-NEXT: cmplw r4, r3 ; CHECK-P10-LE-NEXT: setbc r3, gt ; CHECK-P10-LE-NEXT: b fn2@notoc @@ -122,14 +121,13 @@ ; CHECK-P10-CMP-LE-NEXT: blr ; ; CHECK-P10-CMP-BE-LABEL: testCompare1: -; CHECK-P10-CMP-BE: .localentry testCompare1, 1 -; CHECK-P10-CMP-BE-NEXT: # %bb.0: # %entry +; CHECK-P10-CMP-BE: # %bb.0: # %entry ; CHECK-P10-CMP-BE-NEXT: plbz r4, testCompare1@PCREL(0), 1 ; CHECK-P10-CMP-BE-NEXT: lbz r3, 0(r3) -; CHECK-P10-CMP-BE-NEXT: clrlwi r3, r3, 31 ; CHECK-P10-CMP-BE-NEXT: clrlwi r4, r4, 31 -; CHECK-P10-CMP-BE-NEXT: clrldi r3, r3, 32 +; CHECK-P10-CMP-BE-NEXT: clrlwi r3, r3, 31 ; CHECK-P10-CMP-BE-NEXT: clrldi r4, r4, 32 +; CHECK-P10-CMP-BE-NEXT: clrldi r3, r3, 32 ; CHECK-P10-CMP-BE-NEXT: sub r3, r3, r4 ; CHECK-P10-CMP-BE-NEXT: rldicl r3, r3, 1, 63 ; CHECK-P10-CMP-BE-NEXT: b fn2@notoc diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll --- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll +++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll @@ -72,12 +72,11 @@ ; LE-NEXT: blr ; ; CHECK-P10-LE-LABEL: testCompare1: -; CHECK-P10-LE: .localentry testCompare1, 1 -; CHECK-P10-LE-NEXT: # %bb.0: # %entry +; CHECK-P10-LE: # %bb.0: # %entry ; CHECK-P10-LE-NEXT: plbz r4, testCompare1@PCREL(0), 1 ; CHECK-P10-LE-NEXT: lbz r3, 0(r3) -; CHECK-P10-LE-NEXT: clrlwi r3, r3, 31 ; CHECK-P10-LE-NEXT: clrlwi r4, r4, 31 +; CHECK-P10-LE-NEXT: clrlwi r3, r3, 31 ; CHECK-P10-LE-NEXT: cmplw r4, r3 ; CHECK-P10-LE-NEXT: setbc r3, lt ; CHECK-P10-LE-NEXT: b fn2@notoc @@ -124,14 +123,13 @@ ; CHECK-P10-CMP-LE-NEXT: blr ; ; CHECK-P10-CMP-BE-LABEL: testCompare1: -; CHECK-P10-CMP-BE: .localentry testCompare1, 1 -; CHECK-P10-CMP-BE-NEXT: # %bb.0: # %entry +; CHECK-P10-CMP-BE: # %bb.0: # %entry ; CHECK-P10-CMP-BE-NEXT: plbz r4, testCompare1@PCREL(0), 1 ; CHECK-P10-CMP-BE-NEXT: lbz r3, 0(r3) -; CHECK-P10-CMP-BE-NEXT: clrlwi r3, r3, 31 ; CHECK-P10-CMP-BE-NEXT: clrlwi r4, r4, 31 -; CHECK-P10-CMP-BE-NEXT: clrldi r3, r3, 32 +; CHECK-P10-CMP-BE-NEXT: clrlwi r3, r3, 31 ; CHECK-P10-CMP-BE-NEXT: clrldi r4, r4, 32 +; CHECK-P10-CMP-BE-NEXT: clrldi r3, r3, 32 ; CHECK-P10-CMP-BE-NEXT: sub r3, r4, r3 ; CHECK-P10-CMP-BE-NEXT: rldicl r3, r3, 1, 63 ; CHECK-P10-CMP-BE-NEXT: b fn2@notoc